69 #include "llvm/IR/IntrinsicsPowerPC.h"
102 using namespace llvm;
104 #define DEBUG_TYPE "ppc-lowering"
110 cl::desc(
"disable setting the node scheduling preference to ILP on PPC"),
cl::Hidden);
125 "ppc-quadword-atomics",
131 STATISTIC(ShufflesHandledWithVPERM,
"Number of shuffles lowered to a VPERM");
132 STATISTIC(NumDynamicAllocaProbed,
"Number of dynamic stack allocation probed");
149 initializeAddrModeMap();
152 bool isPPC64 = Subtarget.
isPPC64();
216 if (!Subtarget.
hasSPE()) {
225 for (
MVT VT : ScalarIntVTs) {
235 if (isPPC64 || Subtarget.
hasFPCVT()) {
344 if (!Subtarget.
hasSPE()) {
515 if (
TM.Options.UnsafeFPMath) {
737 if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
966 if (
TM.Options.UnsafeFPMath) {
1203 }
else if (Subtarget.
hasVSX()) {
1269 if (Subtarget.
hasMMA()) {
1474 void PPCTargetLowering::initializeAddrModeMap() {
1525 if (MaxAlign == MaxMaxAlign)
1527 if (
VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1528 if (MaxMaxAlign >= 32 &&
1529 VTy->getPrimitiveSizeInBits().getFixedSize() >= 256)
1530 MaxAlign =
Align(32);
1531 else if (VTy->getPrimitiveSizeInBits().getFixedSize() >= 128 &&
1533 MaxAlign =
Align(16);
1534 }
else if (
ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1537 if (EltAlign > MaxAlign)
1538 MaxAlign = EltAlign;
1539 }
else if (
StructType *STy = dyn_cast<StructType>(Ty)) {
1540 for (
auto *EltTy : STy->elements()) {
1543 if (EltAlign > MaxAlign)
1544 MaxAlign = EltAlign;
1545 if (MaxAlign == MaxMaxAlign)
1560 return Alignment.
value();
1568 return Subtarget.
hasSPE();
1590 return "PPCISD::FP_TO_UINT_IN_VSR,";
1592 return "PPCISD::FP_TO_SINT_IN_VSR";
1596 return "PPCISD::FTSQRT";
1598 return "PPCISD::FSQRT";
1603 return "PPCISD::XXSPLTI_SP_TO_DP";
1605 return "PPCISD::XXSPLTI32DX";
1640 return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";
1642 return "PPCISD::ANDI_rec_1_EQ_BIT";
1644 return "PPCISD::ANDI_rec_1_GT_BIT";
1659 return "PPCISD::ST_VSR_SCAL_INT";
1685 return "PPCISD::PADDI_DTPREL";
1702 return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
1704 return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR";
1712 return "PPCISD::STRICT_FADDRTZ";
1714 return "PPCISD::STRICT_FCTIDZ";
1716 return "PPCISD::STRICT_FCTIWZ";
1718 return "PPCISD::STRICT_FCTIDUZ";
1720 return "PPCISD::STRICT_FCTIWUZ";
1722 return "PPCISD::STRICT_FCFID";
1724 return "PPCISD::STRICT_FCFIDU";
1726 return "PPCISD::STRICT_FCFIDS";
1728 return "PPCISD::STRICT_FCFIDUS";
1754 return CFP->getValueAPF().isZero();
1758 if (
const ConstantFP *CFP = dyn_cast<ConstantFP>(
CP->getConstVal()))
1759 return CFP->getValueAPF().isZero();
1767 return Op < 0 ||
Op == Val;
1779 if (ShuffleKind == 0) {
1782 for (
unsigned i = 0;
i != 16; ++
i)
1785 }
else if (ShuffleKind == 2) {
1788 for (
unsigned i = 0;
i != 16; ++
i)
1791 }
else if (ShuffleKind == 1) {
1792 unsigned j = IsLE ? 0 : 1;
1793 for (
unsigned i = 0;
i != 8; ++
i)
1810 if (ShuffleKind == 0) {
1813 for (
unsigned i = 0;
i != 16;
i += 2)
1817 }
else if (ShuffleKind == 2) {
1820 for (
unsigned i = 0;
i != 16;
i += 2)
1824 }
else if (ShuffleKind == 1) {
1825 unsigned j = IsLE ? 0 : 2;
1826 for (
unsigned i = 0;
i != 8;
i += 2)
1852 if (ShuffleKind == 0) {
1855 for (
unsigned i = 0;
i != 16;
i += 4)
1861 }
else if (ShuffleKind == 2) {
1864 for (
unsigned i = 0;
i != 16;
i += 4)
1870 }
else if (ShuffleKind == 1) {
1871 unsigned j = IsLE ? 0 : 4;
1872 for (
unsigned i = 0;
i != 8;
i += 4)
1889 unsigned LHSStart,
unsigned RHSStart) {
1892 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
1893 "Unsupported merge size!");
1895 for (
unsigned i = 0;
i != 8/UnitSize; ++
i)
1896 for (
unsigned j = 0;
j != UnitSize; ++
j) {
1898 LHSStart+
j+
i*UnitSize) ||
1900 RHSStart+
j+
i*UnitSize))
1915 if (ShuffleKind == 1)
1917 else if (ShuffleKind == 2)
1922 if (ShuffleKind == 1)
1924 else if (ShuffleKind == 0)
1940 if (ShuffleKind == 1)
1942 else if (ShuffleKind == 2)
1947 if (ShuffleKind == 1)
1949 else if (ShuffleKind == 0)
1999 unsigned RHSStartValue) {
2003 for (
unsigned i = 0;
i < 2; ++
i)
2004 for (
unsigned j = 0;
j < 4; ++
j)
2006 i*RHSStartValue+
j+IndexOffset) ||
2008 i*RHSStartValue+
j+IndexOffset+8))
2030 unsigned indexOffset = CheckEven ? 4 : 0;
2031 if (ShuffleKind == 1)
2033 else if (ShuffleKind == 2)
2039 unsigned indexOffset = CheckEven ? 0 : 4;
2040 if (ShuffleKind == 1)
2042 else if (ShuffleKind == 0)
2068 if (
i == 16)
return -1;
2073 if (ShiftAmt <
i)
return -1;
2078 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
2080 for (++
i;
i != 16; ++
i)
2083 }
else if (ShuffleKind == 1) {
2085 for (++
i;
i != 16; ++
i)
2092 ShiftAmt = 16 - ShiftAmt;
2102 EltSize <= 8 &&
"Can only handle 1,2,4,8 byte element sizes");
2106 if (
N->getMaskElt(0) % EltSize != 0)
2111 unsigned ElementBase =
N->getMaskElt(0);
2114 if (ElementBase >= 16)
2119 for (
unsigned i = 1;
i != EltSize; ++
i)
2120 if (
N->getMaskElt(
i) < 0 ||
N->getMaskElt(
i) != (
int)(
i+ElementBase))
2123 for (
unsigned i = EltSize,
e = 16;
i !=
e;
i += EltSize) {
2124 if (
N->getMaskElt(
i) < 0)
continue;
2125 for (
unsigned j = 0;
j != EltSize; ++
j)
2126 if (
N->getMaskElt(
i+
j) !=
N->getMaskElt(
j))
2144 "Unexpected element width.");
2145 assert((StepLen == 1 || StepLen == -1) &&
"Unexpected element width.");
2147 unsigned NumOfElem = 16 /
Width;
2148 unsigned MaskVal[16];
2149 for (
unsigned i = 0;
i < NumOfElem; ++
i) {
2150 MaskVal[0] =
N->getMaskElt(
i *
Width);
2151 if ((StepLen == 1) && (MaskVal[0] %
Width)) {
2153 }
else if ((StepLen == -1) && ((MaskVal[0] + 1) %
Width)) {
2157 for (
unsigned int j = 1;
j <
Width; ++
j) {
2158 MaskVal[
j] =
N->getMaskElt(
i *
Width +
j);
2159 if (MaskVal[
j] != MaskVal[
j-1] + StepLen) {
2169 unsigned &InsertAtByte,
bool &Swap,
bool IsLE) {
2174 unsigned M0 =
N->getMaskElt(0) / 4;
2175 unsigned M1 =
N->getMaskElt(4) / 4;
2176 unsigned M2 =
N->getMaskElt(8) / 4;
2177 unsigned M3 =
N->getMaskElt(12) / 4;
2178 unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
2179 unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
2184 if ((
M0 > 3 &&
M1 == 1 && M2 == 2 && M3 == 3) ||
2185 (
M0 < 4 &&
M1 == 5 && M2 == 6 && M3 == 7)) {
2186 ShiftElts = IsLE ? LittleEndianShifts[
M0 & 0x3] : BigEndianShifts[
M0 & 0x3];
2187 InsertAtByte = IsLE ? 12 : 0;
2192 if ((
M1 > 3 &&
M0 == 0 && M2 == 2 && M3 == 3) ||
2193 (
M1 < 4 &&
M0 == 4 && M2 == 6 && M3 == 7)) {
2194 ShiftElts = IsLE ? LittleEndianShifts[
M1 & 0x3] : BigEndianShifts[
M1 & 0x3];
2195 InsertAtByte = IsLE ? 8 : 4;
2200 if ((M2 > 3 &&
M0 == 0 &&
M1 == 1 && M3 == 3) ||
2201 (M2 < 4 &&
M0 == 4 &&
M1 == 5 && M3 == 7)) {
2202 ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
2203 InsertAtByte = IsLE ? 4 : 8;
2208 if ((M3 > 3 &&
M0 == 0 &&
M1 == 1 && M2 == 2) ||
2209 (M3 < 4 &&
M0 == 4 &&
M1 == 5 && M2 == 6)) {
2210 ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
2211 InsertAtByte = IsLE ? 0 : 12;
2218 if (
N->getOperand(1).isUndef()) {
2221 unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2222 if (
M0 == XXINSERTWSrcElem &&
M1 == 1 && M2 == 2 && M3 == 3) {
2223 InsertAtByte = IsLE ? 12 : 0;
2226 if (
M0 == 0 &&
M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2227 InsertAtByte = IsLE ? 8 : 4;
2230 if (
M0 == 0 &&
M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2231 InsertAtByte = IsLE ? 4 : 8;
2234 if (
M0 == 0 &&
M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2235 InsertAtByte = IsLE ? 0 : 12;
2244 bool &Swap,
bool IsLE) {
2251 unsigned M0 =
N->getMaskElt(0) / 4;
2252 unsigned M1 =
N->getMaskElt(4) / 4;
2253 unsigned M2 =
N->getMaskElt(8) / 4;
2254 unsigned M3 =
N->getMaskElt(12) / 4;
2258 if (
N->getOperand(1).isUndef()) {
2259 assert(
M0 < 4 &&
"Indexing into an undef vector?");
2260 if (
M1 != (
M0 + 1) % 4 || M2 != (
M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2263 ShiftElts = IsLE ? (4 -
M0) % 4 :
M0;
2269 if (
M1 != (
M0 + 1) % 8 || M2 != (
M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2273 if (
M0 == 0 ||
M0 == 7 ||
M0 == 6 ||
M0 == 5) {
2278 ShiftElts = (8 -
M0) % 8;
2279 }
else if (
M0 == 4 ||
M0 == 3 ||
M0 == 2 ||
M0 == 1) {
2284 ShiftElts = (4 -
M0) % 4;
2289 if (
M0 == 0 ||
M0 == 1 ||
M0 == 2 ||
M0 == 3) {
2294 }
else if (
M0 == 4 ||
M0 == 5 ||
M0 == 6 ||
M0 == 7) {
2311 for (
int i = 0;
i < 16;
i +=
Width)
2312 if (
N->getMaskElt(
i) !=
i +
Width - 1)
2343 bool &Swap,
bool IsLE) {
2350 unsigned M0 =
N->getMaskElt(0) / 8;
2351 unsigned M1 =
N->getMaskElt(8) / 8;
2352 assert(((
M0 |
M1) < 4) &&
"A mask element out of bounds?");
2356 if (
N->getOperand(1).isUndef()) {
2357 if ((
M0 |
M1) < 2) {
2358 DM = IsLE ? (((~
M1) & 1) << 1) + ((~
M0) & 1) : (
M0 << 1) + (
M1 & 1);
2366 if (
M0 > 1 &&
M1 < 2) {
2368 }
else if (M0 < 2 && M1 > 1) {
2376 DM = (((~
M1) & 1) << 1) + ((~
M0) & 1);
2379 if (M0 < 2 && M1 > 1) {
2381 }
else if (
M0 > 1 &&
M1 < 2) {
2389 DM = (
M0 << 1) + (
M1 & 1);
2403 return (16 / EltSize) - 1 - (SVOp->
getMaskElt(0) / EltSize);
2419 unsigned EltSize = 16/
N->getNumOperands();
2420 if (EltSize < ByteSize) {
2421 unsigned Multiple = ByteSize/EltSize;
2423 assert(Multiple > 1 && Multiple <= 4 &&
"How can this happen?");
2426 for (
unsigned i = 0,
e =
N->getNumOperands();
i !=
e; ++
i) {
2427 if (
N->getOperand(
i).isUndef())
continue;
2429 if (!isa<ConstantSDNode>(
N->getOperand(
i)))
return SDValue();
2431 if (!UniquedVals[
i&(Multiple-1)].getNode())
2432 UniquedVals[
i&(Multiple-1)] =
N->getOperand(
i);
2433 else if (UniquedVals[
i&(Multiple-1)] !=
N->getOperand(
i))
2443 bool LeadingZero =
true;
2444 bool LeadingOnes =
true;
2445 for (
unsigned i = 0;
i != Multiple-1; ++
i) {
2446 if (!UniquedVals[
i].getNode())
continue;
2453 if (!UniquedVals[Multiple-1].getNode())
2455 int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
2460 if (!UniquedVals[Multiple-1].getNode())
2462 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2471 for (
unsigned i = 0,
e =
N->getNumOperands();
i !=
e; ++
i) {
2472 if (
N->getOperand(
i).isUndef())
continue;
2474 OpVal =
N->getOperand(
i);
2475 else if (OpVal !=
N->getOperand(
i))
2481 unsigned ValSizeInBytes = EltSize;
2484 Value = CN->getZExtValue();
2486 assert(CN->getValueType(0) ==
MVT::f32 &&
"Only one legal FP vector type!");
2493 if (ValSizeInBytes < ByteSize)
return SDValue();
2504 if (MaskVal == 0)
return SDValue();
2507 if (SignExtend32<5>(MaskVal) == MaskVal)
2521 if (!isa<ConstantSDNode>(
N))
2524 Imm = (int16_t)cast<ConstantSDNode>(
N)->getZExtValue();
2526 return Imm == (int32_t)cast<ConstantSDNode>(
N)->getZExtValue();
2528 return Imm == (int64_t)cast<ConstantSDNode>(
N)->getZExtValue();
2546 return (~(LHSKnown.
Zero | RHSKnown.
Zero) == 0);
2556 if (
MemSDNode *Memop = dyn_cast<MemSDNode>(*UI)) {
2557 if (Memop->getMemoryVT() ==
MVT::f64) {
2558 Base =
N.getOperand(0);
2571 if (!isa<ConstantSDNode>(
N))
2574 Imm = (int64_t)cast<ConstantSDNode>(
N)->getZExtValue();
2575 return isInt<34>(Imm);
2602 (!EncodingAlignment ||
isAligned(*EncodingAlignment, Imm)))
2607 Base =
N.getOperand(0);
2610 }
else if (
N.getOpcode() ==
ISD::OR) {
2612 (!EncodingAlignment ||
isAligned(*EncodingAlignment, Imm)))
2624 if (~(LHSKnown.
Zero | RHSKnown.
Zero) == 0) {
2625 Base =
N.getOperand(0);
2696 (!EncodingAlignment ||
isAligned(*EncodingAlignment, imm))) {
2702 Base =
N.getOperand(0);
2705 }
else if (
N.getOperand(1).getOpcode() ==
PPCISD::Lo) {
2707 assert(!cast<ConstantSDNode>(
N.getOperand(1).getOperand(1))->getZExtValue()
2708 &&
"Cannot handle constant offsets yet!");
2709 Disp =
N.getOperand(1).getOperand(0);
2714 Base =
N.getOperand(0);
2717 }
else if (
N.getOpcode() ==
ISD::OR) {
2720 (!EncodingAlignment ||
isAligned(*EncodingAlignment, imm))) {
2730 dyn_cast<FrameIndexSDNode>(
N.getOperand(0))) {
2734 Base =
N.getOperand(0);
2747 (!EncodingAlignment ||
isAligned(*EncodingAlignment, Imm))) {
2750 CN->getValueType(0));
2755 if ((CN->getValueType(0) ==
MVT::i32 ||
2756 (int64_t)CN->getZExtValue() == (
int)CN->getZExtValue()) &&
2757 (!EncodingAlignment ||
2758 isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2759 int Addr = (
int)CN->getZExtValue();
2800 Base =
N.getOperand(0);
2816 Base =
N.getOperand(0);
2849 !
N.getOperand(1).hasOneUse() || !
N.getOperand(0).hasOneUse())) {
2850 Base =
N.getOperand(0);
2863 Ty *PCRelCand = dyn_cast<Ty>(
N);
2875 if (isValidPCRelNode<ConstantPoolSDNode>(
N) ||
2876 isValidPCRelNode<GlobalAddressSDNode>(
N) ||
2877 isValidPCRelNode<JumpTableSDNode>(
N) ||
2878 isValidPCRelNode<BlockAddressSDNode>(
N))
2894 EVT MemVT =
LD->getMemoryVT();
2901 if (!
ST.hasP8Vector())
2906 if (!
ST.hasP9Vector())
2919 if (UI.getUse().get().getResNo() == 0 &&
2941 Ptr =
LD->getBasePtr();
2942 VT =
LD->getMemoryVT();
2943 Alignment =
LD->getAlignment();
2945 Ptr =
ST->getBasePtr();
2946 VT =
ST->getMemoryVT();
2947 Alignment =
ST->getAlignment();
2970 if (isa<FrameIndexSDNode>(
Base) || isa<RegisterSDNode>(
Base))
2973 SDValue Val = cast<StoreSDNode>(
N)->getValue();
3003 isa<ConstantSDNode>(
Offset))
3018 unsigned &HiOpFlags,
unsigned &LoOpFlags,
3060 const bool Is64Bit = Subtarget.
isPPC64();
3075 EVT PtrVT =
Op.getValueType();
3091 return getTOCEntry(DAG,
SDLoc(
CP), GA);
3094 unsigned MOHiFlag, MOLoFlag;
3101 return getTOCEntry(DAG,
SDLoc(
CP), GA);
3161 EVT PtrVT =
Op.getValueType();
3179 return getTOCEntry(DAG,
SDLoc(
JT), GA);
3182 unsigned MOHiFlag, MOLoFlag;
3189 return getTOCEntry(DAG,
SDLoc(GA), GA);
3199 EVT PtrVT =
Op.getValueType();
3218 return getTOCEntry(DAG,
SDLoc(BASDN), GA);
3227 unsigned MOHiFlag, MOLoFlag;
3238 return LowerGlobalTLSAddressAIX(
Op, DAG);
3240 return LowerGlobalTLSAddressLinux(
Op, DAG);
3264 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3265 SDValue RegionHandle = getTOCEntry(DAG, dl, RegionHandleTGA);
3283 bool is64bit = Subtarget.
isPPC64();
3331 if (!
TM.isPositionIndependent())
3390 PtrVT, GOTPtr, TGA, TGA);
3392 PtrVT, TLSAddr, TGA);
3401 EVT PtrVT =
Op.getValueType();
3427 return getTOCEntry(DAG,
DL, GA);
3430 unsigned MOHiFlag, MOLoFlag;
3438 return getTOCEntry(DAG,
DL, GA);
3450 bool IsStrict =
Op->isStrictFPOpcode();
3452 cast<CondCodeSDNode>(
Op.getOperand(IsStrict ? 3 : 2))->get();
3453 SDValue LHS =
Op.getOperand(IsStrict ? 1 : 0);
3454 SDValue RHS =
Op.getOperand(IsStrict ? 2 : 1);
3462 "SETCC for f128 is already legal under Power9!");
3473 assert(!IsStrict &&
"Don't know how to handle STRICT_FSETCC!");
3506 if (
C->isAllOnesValue() ||
C->isNullValue())
3516 EVT VT =
Op.getValueType();
3525 EVT VT = Node->getValueType(0);
3527 SDValue InChain = Node->getOperand(0);
3528 SDValue VAListPtr = Node->getOperand(1);
3529 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3570 InChain = OverflowArea.
getValue(1);
3616 InChain = DAG.
getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3623 assert(!Subtarget.
isPPC64() &&
"LowerVACOPY is PPC32 only");
3638 return Op.getOperand(0);
3647 "Expecting Inline ASM node.");
3656 unsigned NumOps =
Op.getNumOperands();
3657 if (
Op.getOperand(NumOps - 1).getValueType() ==
MVT::Glue)
3662 unsigned Flags = cast<ConstantSDNode>(
Op.getOperand(
i))->getZExtValue();
3677 for (; NumVals; --NumVals, ++
i) {
3679 if (
Reg != PPC::LR &&
Reg != PPC::LR8)
3704 bool isPPC64 = (PtrVT ==
MVT::i64);
3708 TargetLowering::ArgListEntry Entry;
3710 Entry.Ty = IntPtrTy;
3711 Entry.Node = Trmp;
Args.push_back(Entry);
3714 Entry.Node = DAG.
getConstant(isPPC64 ? 48 : 40, dl,
3716 Args.push_back(Entry);
3718 Entry.Node = FPtr;
Args.push_back(Entry);
3719 Entry.Node = Nest;
Args.push_back(Entry);
3723 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3727 std::pair<SDValue, SDValue> CallResult =
LowerCallTo(CLI);
3728 return CallResult.second;
3742 const Value *SV = cast<SrcValueSDNode>(
Op.getOperand(2))->getValue();
3743 return DAG.
getStore(
Op.getOperand(0), dl, FR,
Op.getOperand(1),
3778 uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
3781 uint64_t
StackOffset = PtrVT.getSizeInBits()/8 - 1;
3784 uint64_t FPROffset = 1;
3787 const Value *SV = cast<SrcValueSDNode>(
Op.getOperand(2))->getValue();
3793 uint64_t nextOffset = FPROffset;
3802 nextPtr = DAG.
getNode(
ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
3805 SDValue thirdStore = DAG.
getStore(secondStore, dl, StackOffsetFI, nextPtr,
3807 nextOffset += FrameOffset;
3808 nextPtr = DAG.
getNode(
ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
3811 return DAG.
getStore(thirdStore, dl, FR, nextPtr,
3817 static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
3818 PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
3819 PPC::F11, PPC::F12, PPC::F13};
3824 unsigned PtrByteSize) {
3832 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3841 unsigned PtrByteSize) {
3842 Align Alignment(PtrByteSize);
3849 Alignment =
Align(16);
3854 if (BVAlign > PtrByteSize) {
3855 if (BVAlign.value() % PtrByteSize != 0)
3857 "ByVal alignment is not a multiple of the pointer size");
3859 Alignment = BVAlign;
3882 unsigned PtrByteSize,
unsigned LinkageSize,
3883 unsigned ParamAreaSize,
unsigned &ArgOffset,
3884 unsigned &AvailableFPRs,
3885 unsigned &AvailableVRs) {
3886 bool UseMemory =
false;
3891 ArgOffset =
alignTo(ArgOffset, Alignment);
3894 if (ArgOffset >= LinkageSize + ParamAreaSize)
3900 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3903 if (ArgOffset > LinkageSize + ParamAreaSize)
3910 if (AvailableFPRs > 0) {
3918 if (AvailableVRs > 0) {
3930 unsigned NumBytes) {
3934 SDValue PPCTargetLowering::LowerFormalArguments(
3939 return LowerFormalArguments_AIX(Chain, CallConv, isVarArg,
Ins, dl, DAG,
3942 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg,
Ins, dl, DAG,
3945 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg,
Ins, dl, DAG,
3949 SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
3991 const Align PtrAlign(4);
4000 CCInfo.AllocateStack(LinkageSize, PtrAlign);
4002 CCInfo.PreAnalyzeFormalArguments(
Ins);
4005 CCInfo.clearWasPPCF128();
4007 for (
unsigned i = 0,
e = ArgLocs.size();
i !=
e; ++
i) {
4020 RC = &PPC::GPRCRegClass;
4024 RC = &PPC::VSSRCRegClass;
4025 else if (Subtarget.
hasSPE())
4026 RC = &PPC::GPRCRegClass;
4028 RC = &PPC::F4RCRegClass;
4032 RC = &PPC::VSFRCRegClass;
4033 else if (Subtarget.
hasSPE())
4035 RC = &PPC::GPRCRegClass;
4037 RC = &PPC::F8RCRegClass;
4042 RC = &PPC::VRRCRegClass;
4045 RC = &PPC::VRRCRegClass;
4049 RC = &PPC::VRRCRegClass;
4057 assert(
i + 1 <
e &&
"No second half of double precision argument");
4059 unsigned RegHi = MF.
addLiveIn(ArgLocs[++
i].getLocReg(), RC);
4074 InVals.push_back(ArgValue);
4085 ArgOffset += ArgSize - ObjSize;
4103 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign);
4108 unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
4109 MinReservedArea =
std::max(MinReservedArea, LinkageSize);
4126 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4131 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
4143 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
4148 CCInfo.getNextStackOffset(),
true));
4157 for (
unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
4161 VReg = MF.
addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
4166 MemOps.push_back(
Store);
4176 for (
unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
4180 VReg = MF.
addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
4185 MemOps.push_back(
Store);
4193 if (!MemOps.empty())
4204 const SDLoc &dl)
const {
4215 SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
4228 "fastcc not supported on varargs functions");
4234 unsigned PtrByteSize = 8;
4238 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4239 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4243 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4256 bool HasParameterArea = !isELFv2ABI || isVarArg;
4257 unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
4258 unsigned NumBytes = LinkageSize;
4259 unsigned AvailableFPRs = Num_FPR_Regs;
4260 unsigned AvailableVRs = Num_VR_Regs;
4261 for (
unsigned i = 0,
e =
Ins.size();
i !=
e; ++
i) {
4266 PtrByteSize, LinkageSize, ParamAreaSize,
4267 NumBytes, AvailableFPRs, AvailableVRs))
4268 HasParameterArea =
true;
4275 unsigned ArgOffset = LinkageSize;
4276 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4279 unsigned CurArgIdx = 0;
4280 for (
unsigned ArgNo = 0,
e =
Ins.size(); ArgNo !=
e; ++ArgNo) {
4282 bool needsLoad =
false;
4283 EVT ObjectVT =
Ins[ArgNo].VT;
4284 EVT OrigVT =
Ins[ArgNo].ArgVT;
4286 unsigned ArgSize = ObjSize;
4288 if (
Ins[ArgNo].isOrigArg()) {
4289 std::advance(FuncArg,
Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4290 CurArgIdx =
Ins[ArgNo].getOrigArgIndex();
4295 unsigned CurArgOffset;
4297 auto ComputeArgOffset = [&]() {
4301 ArgOffset =
alignTo(ArgOffset, Alignment);
4302 CurArgOffset = ArgOffset;
4309 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4310 GPR_idx =
std::min(GPR_idx, Num_GPR_Regs);
4316 assert(
Ins[ArgNo].isOrigArg() &&
"Byval arguments cannot be implicit");
4323 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4334 InVals.push_back(FIN);
4345 if (HasParameterArea ||
4346 ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4353 if (ObjSize < PtrByteSize) {
4357 if (!isLittleEndian) {
4361 InVals.push_back(
Arg);
4363 if (GPR_idx != Num_GPR_Regs) {
4364 unsigned VReg = MF.
addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4369 if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
4382 MemOps.push_back(
Store);
4386 ArgOffset += PtrByteSize;
4392 InVals.push_back(FIN);
4395 for (
unsigned j = 0;
j < ArgSize;
j += PtrByteSize) {
4396 if (GPR_idx == Num_GPR_Regs)
4399 unsigned VReg = MF.
addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4409 MemOps.push_back(
Store);
4412 ArgOffset += ArgSize;
4423 unsigned VReg = MF.
addLiveIn(PPC::X11, &PPC::G8RCRegClass);
4427 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4435 if (GPR_idx != Num_GPR_Regs) {
4436 unsigned VReg = MF.
addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4443 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4449 ArgSize = PtrByteSize;
4460 if (FPR_idx != Num_FPR_Regs) {
4466 ? &PPC::VSSRCRegClass
4467 : &PPC::F4RCRegClass);
4470 ? &PPC::VSFRCRegClass
4471 : &PPC::F8RCRegClass);
4482 unsigned VReg = MF.
addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4487 if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4506 ArgOffset += ArgSize;
4508 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4522 if (VR_idx != Num_VR_Regs) {
4523 unsigned VReg = MF.
addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4539 if (ObjSize < ArgSize && !isLittleEndian)
4540 CurArgOffset += ArgSize - ObjSize;
4546 InVals.push_back(ArgVal);
4550 unsigned MinReservedArea;
4551 if (HasParameterArea)
4552 MinReservedArea =
std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4554 MinReservedArea = LinkageSize;
4571 int Depth = ArgOffset;
4580 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4581 GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4582 unsigned VReg = MF.
addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4586 MemOps.push_back(
Store);
4593 if (!MemOps.empty())
4602 unsigned ParamSize) {
4604 if (!isTailCall)
return 0;
4608 int SPDiff = (
int)CallerMinReservedArea - (
int)ParamSize;
4610 if (SPDiff < FI->getTailCallSPDelta())
4625 "PC Relative callers do not have a TOC and cannot share a TOC Base");
4641 if (!
TM.shouldAssumeDSOLocal(*Caller->getParent(), GV))
4647 const Function *
F = dyn_cast<Function>(GV);
4648 const GlobalAlias *Alias = dyn_cast<GlobalAlias>(GV);
4653 F = dyn_cast<Function>(GlobalObj);
4686 if (
TM.getFunctionSections() || GV->
hasComdat() || Caller->hasComdat() ||
4689 if (
const auto *
F = dyn_cast<Function>(GV)) {
4690 if (
F->getSectionPrefix() != Caller->getSectionPrefix())
4702 const unsigned PtrByteSize = 8;
4706 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4707 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4711 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4715 const unsigned NumFPRs = 13;
4717 const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4719 unsigned NumBytes = LinkageSize;
4720 unsigned AvailableFPRs = NumFPRs;
4721 unsigned AvailableVRs = NumVRs;
4724 if (
Param.Flags.isNest())
continue;
4727 LinkageSize, ParamAreaSize, NumBytes,
4728 AvailableFPRs, AvailableVRs))
4739 auto CalleeArgEnd = CB.
arg_end();
4742 for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
4743 const Value* CalleeArg = *CalleeArgIter;
4744 const Value* CallerArg = &(*CallerArgIter);
4745 if (CalleeArg == CallerArg)
4753 isa<UndefValue>(CalleeArg))
4771 if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
4781 bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4787 if (
DisableSCO && !TailCallOpt)
return false;
4790 if (isVarArg)
return false;
4822 if (Caller.getCallingConv() != CalleeCC &&
4869 PPCTargetLowering::IsEligibleForTailCallOptimization(
SDValue Callee,
4885 for (
unsigned i = 0;
i !=
Ins.size();
i++) {
4887 if (Flags.
isByVal())
return false;
4897 return G->getGlobal()->hasHiddenVisibility()
4898 ||
G->getGlobal()->hasProtectedVisibility();
4908 if (!
C)
return nullptr;
4910 int Addr =
C->getZExtValue();
4911 if ((
Addr & 3) != 0 ||
4917 (
int)
C->getZExtValue() >> 2,
SDLoc(
Op),
4924 struct TailCallArgumentInfo {
4929 TailCallArgumentInfo() =
default;
4939 for (
unsigned i = 0,
e = TailCallArgs.size();
i !=
e; ++
i) {
4941 SDValue FIN = TailCallArgs[
i].FrameIdxOp;
4942 int FI = TailCallArgs[
i].FrameIdx;
4944 MemOpChains.push_back(DAG.
getStore(
4945 Chain, dl,
Arg, FIN,
4954 int SPDiff,
const SDLoc &dl) {
4960 bool isPPC64 = Subtarget.
isPPC64();
4961 int SlotSize = isPPC64 ? 8 : 4;
4962 int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
4964 NewRetAddrLoc,
true);
4967 Chain = DAG.
getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
4979 int Offset = ArgOffset + SPDiff;
4980 uint32_t OpSize = (
Arg.getValueSizeInBits() + 7) / 8;
4984 TailCallArgumentInfo
Info;
4986 Info.FrameIdxOp = FIN;
4988 TailCallArguments.push_back(
Info);
4994 SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
5000 LROpOut = getReturnAddrFrameIndex(DAG);
5017 return DAG.
getMemcpy(Chain, dl, Dst, Src, SizeNode,
5026 SDValue PtrOff,
int SPDiff,
unsigned ArgOffset,
bool isPPC64,
5040 MemOpChains.push_back(
5049 const SDLoc &dl,
int SPDiff,
unsigned NumBytes,
SDValue LROp,
5059 if (!MemOpChains2.empty())
5079 return G->getGlobal()->getValueType()->isFunctionTy();
5085 SDValue PPCTargetLowering::LowerCallResult(
5093 CCRetInfo.AnalyzeCallResult(
5099 for (
unsigned i = 0,
e = RVLocs.size();
i !=
e; ++
i) {
5108 Chain =
Lo.getValue(1);
5109 InFlag =
Lo.getValue(2);
5113 Chain =
Hi.getValue(1);
5114 InFlag =
Hi.getValue(2);
5143 InVals.push_back(Val);
5225 auto isLocalCallee = [&]() {
5231 !dyn_cast_or_null<GlobalIFunc>(GV);
5242 const auto getAIXFuncEntryPointSymbolSDNode = [&](
const GlobalValue *GV) {
5256 assert(!isa<GlobalIFunc>(GV) &&
"IFunc is not supported on AIX.");
5257 return getAIXFuncEntryPointSymbolSDNode(GV);
5264 const char *SymName =
S->getSymbol();
5271 return getAIXFuncEntryPointSymbolSDNode(
F);
5277 const auto getExternalFunctionEntryPointSymbol = [&](
StringRef SymName) {
5285 SymName = getExternalFunctionEntryPointSymbol(SymName)->
getName().
data();
5298 "Expected a CALLSEQ_STARTSDNode.");
5373 const unsigned Alignment = Subtarget.
isPPC64() ? 8 : 4;
5377 Alignment, MMOFlags);
5384 DAG.
getLoad(RegVT, dl, LDChain, AddTOC,
5391 DAG.
getLoad(RegVT, dl, LDChain, AddPtr,
5403 "Nest parameter is not supported on AIX.");
5419 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5422 const bool IsPPC64 = Subtarget.
isPPC64();
5427 Ops.push_back(Chain);
5451 Ops.push_back(AddTOC);
5462 Ops.push_back(DAG.
getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
5471 for (
unsigned i = 0,
e = RegsToPass.size();
i !=
e; ++
i)
5473 RegsToPass[
i].second.getValueType()));
5490 assert(
Mask &&
"Missing call preserved mask for calling convention");
5495 Ops.push_back(Glue);
5498 SDValue PPCTargetLowering::FinishCall(
5513 if (!CFlags.IsIndirect)
5517 dl, CFlags.HasNest, Subtarget);
5527 if (CFlags.IsTailCall) {
5531 cast<RegisterSDNode>(
Callee)->getReg() == PPC::CTR) ||
5534 isa<ConstantSDNode>(
Callee) ||
5536 "Expecting a global address, external symbol, absolute value, "
5537 "register or an indirect tail call when PC Relative calls are "
5541 "Unexpected call opcode for a tail call.");
5547 Chain = DAG.
getNode(CallOpc, dl, ReturnTypes, Ops);
5564 return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg,
Ins, dl,
5588 isTailCall = IsEligibleForTailCallOptimization_64SVR4(
5589 Callee, CallConv, CB, isVarArg, Outs,
Ins, DAG);
5591 isTailCall = IsEligibleForTailCallOptimization(
Callee, CallConv, isVarArg,
5605 isa<GlobalAddressSDNode>(
Callee)) &&
5606 "Callee should be an llvm::Function object.");
5609 <<
"\nTCO callee: ");
5616 "site marked musttail");
5626 CallConv, isTailCall, isVarArg, isPatchPoint,
5634 return LowerCall_AIX(Chain,
Callee, CFlags, Outs, OutVals,
Ins, dl, DAG,
5639 return LowerCall_64SVR4(Chain,
Callee, CFlags, Outs, OutVals,
Ins, dl, DAG,
5641 return LowerCall_32SVR4(Chain,
Callee, CFlags, Outs, OutVals,
Ins, dl, DAG,
5645 SDValue PPCTargetLowering::LowerCall_32SVR4(
5656 const bool IsVarArg = CFlags.IsVarArg;
5657 const bool IsTailCall = CFlags.IsTailCall;
5663 const Align PtrAlign(4);
5688 CCInfo.PreAnalyzeCallOperands(Outs);
5694 unsigned NumArgs = Outs.size();
5696 for (
unsigned i = 0;
i != NumArgs; ++
i) {
5697 MVT ArgVT = Outs[
i].VT;
5701 if (Outs[
i].IsFixed) {
5711 errs() <<
"Call operand #" <<
i <<
" has unhandled type "
5721 CCInfo.clearWasPPCF128();
5728 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign);
5735 unsigned NumBytes = CCByValInfo.getNextStackOffset();
5749 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5760 bool seenFloatArg =
false;
5765 for (
unsigned i = 0, RealArgIdx = 0,
j = 0,
e = ArgLocs.size();
5767 ++
i, ++RealArgIdx) {
5777 assert((
j < ByValArgLocs.size()) &&
"Index out of bounds!");
5800 Chain = CallSeqStart = NewCallSeqStart;
5826 RegsToPass.push_back(std::make_pair(ArgLocs[++
i].getLocReg(),
5829 RegsToPass.push_back(std::make_pair(VA.
getLocReg(),
Arg));
5840 MemOpChains.push_back(
5850 if (!MemOpChains.empty())
5856 for (
unsigned i = 0,
e = RegsToPass.size();
i !=
e; ++
i) {
5858 RegsToPass[
i].second, InFlag);
5866 SDValue Ops[] = { Chain, InFlag };
5878 return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
5879 Callee, SPDiff, NumBytes,
Ins, InVals, CB);
5884 SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
5896 return NewCallSeqStart;
5899 SDValue PPCTargetLowering::LowerCall_64SVR4(
5908 unsigned NumOps = Outs.size();
5909 bool IsSibCall =
false;
5913 unsigned PtrByteSize = 8;
5928 assert(!(IsFastCall && CFlags.IsVarArg) &&
5929 "fastcc not supported on varargs functions");
5936 unsigned NumBytes = LinkageSize;
5937 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
5940 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
5941 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
5945 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
5956 bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
5957 if (!HasParameterArea) {
5958 unsigned ParamAreaSize = NumGPRs * PtrByteSize;
5959 unsigned AvailableFPRs = NumFPRs;
5960 unsigned AvailableVRs = NumVRs;
5961 unsigned NumBytesTmp = NumBytes;
5962 for (
unsigned i = 0;
i != NumOps; ++
i) {
5963 if (Outs[
i].Flags.
isNest())
continue;
5965 PtrByteSize, LinkageSize, ParamAreaSize,
5966 NumBytesTmp, AvailableFPRs, AvailableVRs))
5967 HasParameterArea =
true;
5973 unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
5978 HasParameterArea =
false;
5981 for (
unsigned i = 0;
i != NumOps; ++
i) {
5983 EVT ArgVT = Outs[
i].VT;
5984 EVT OrigVT = Outs[
i].ArgVT;
5992 if (NumGPRsUsed > NumGPRs)
5993 HasParameterArea =
true;
6000 if (++NumGPRsUsed <= NumGPRs)
6010 if (++NumVRsUsed <= NumVRs)
6014 if (++NumVRsUsed <= NumVRs)
6019 if (++NumFPRsUsed <= NumFPRs)
6023 HasParameterArea =
true;
6030 NumBytes =
alignTo(NumBytes, Alignement);
6034 NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6037 unsigned NumBytesActuallyUsed = NumBytes;
6047 if (HasParameterArea)
6048 NumBytes =
std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6050 NumBytes = LinkageSize;
6065 if (CFlags.IsTailCall)
6077 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6088 unsigned ArgOffset = LinkageSize;
6094 for (
unsigned i = 0;
i != NumOps; ++
i) {
6097 EVT ArgVT = Outs[
i].VT;
6098 EVT OrigVT = Outs[
i].ArgVT;
6107 auto ComputePtrOff = [&]() {
6111 ArgOffset =
alignTo(ArgOffset, Alignment);
6122 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
6123 GPR_idx =
std::min(GPR_idx, NumGPRs);
6155 if (GPR_idx != NumGPRs) {
6158 MemOpChains.push_back(
Load.getValue(1));
6159 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++],
Load));
6161 ArgOffset += PtrByteSize;
6166 if (GPR_idx == NumGPRs &&
Size < 8) {
6168 if (!isLittleEndian) {
6173 Chain = CallSeqStart = createMemcpyOutsideCallSeq(
Arg, AddPtr,
6176 ArgOffset += PtrByteSize;
6193 Chain = CallSeqStart = createMemcpyOutsideCallSeq(
Arg, PtrOff,
6198 if (
Size < 8 && GPR_idx != NumGPRs) {
6208 if (!isLittleEndian) {
6212 Chain = CallSeqStart = createMemcpyOutsideCallSeq(
Arg, AddPtr,
6219 MemOpChains.push_back(
Load.getValue(1));
6220 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++],
Load));
6223 ArgOffset += PtrByteSize;
6229 for (
unsigned j=0;
j<
Size;
j+=PtrByteSize) {
6232 if (GPR_idx != NumGPRs) {
6235 MemOpChains.push_back(
Load.getValue(1));
6236 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++],
Load));
6237 ArgOffset += PtrByteSize;
6239 ArgOffset += ((
Size -
j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6246 switch (
Arg.getSimpleValueType().SimpleTy) {
6253 RegsToPass.push_back(std::make_pair(PPC::X11,
Arg));
6260 if (GPR_idx != NumGPRs) {
6261 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++],
Arg));
6266 assert(HasParameterArea &&
6267 "Parameter area must exist to pass an argument in memory.");
6269 true, CFlags.IsTailCall,
false, MemOpChains,
6270 TailCallArguments, dl);
6272 ArgOffset += PtrByteSize;
6275 ArgOffset += PtrByteSize;
6288 bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
6289 bool NeededLoad =
false;
6292 if (FPR_idx != NumFPRs)
6293 RegsToPass.push_back(std::make_pair(
FPR[FPR_idx++],
Arg));
6296 if (!NeedGPROrStack)
6298 else if (GPR_idx != NumGPRs && !IsFastCall) {
6318 }
else if (ArgOffset % PtrByteSize != 0) {
6322 if (!isLittleEndian)
6330 if (!isLittleEndian)
6340 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6353 assert(HasParameterArea &&
6354 "Parameter area must exist to pass an argument in memory.");
6356 true, CFlags.IsTailCall,
false, MemOpChains,
6357 TailCallArguments, dl);
6364 if (!IsFastCall || NeededLoad) {
6368 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6388 if (CFlags.IsVarArg) {
6389 assert(HasParameterArea &&
6390 "Parameter area must exist if we have a varargs call.");
6395 MemOpChains.push_back(
Store);
6396 if (VR_idx != NumVRs) {
6399 MemOpChains.push_back(
Load.getValue(1));
6400 RegsToPass.push_back(std::make_pair(VR[VR_idx++],
Load));
6403 for (
unsigned i=0;
i<16;
i+=PtrByteSize) {
6404 if (GPR_idx == NumGPRs)
6410 MemOpChains.push_back(
Load.getValue(1));
6411 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++],
Load));
6417 if (VR_idx != NumVRs) {
6418 RegsToPass.push_back(std::make_pair(VR[VR_idx++],
Arg));
6423 assert(HasParameterArea &&
6424 "Parameter area must exist to pass an argument in memory.");
6426 true, CFlags.IsTailCall,
true, MemOpChains,
6427 TailCallArguments, dl);
6438 assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
6439 "mismatch in size of parameter area");
6440 (void)NumBytesActuallyUsed;
6442 if (!MemOpChains.empty())
6448 if (CFlags.IsIndirect) {
6452 assert(!CFlags.IsTailCall &&
"Indirect tails calls not supported");
6467 if (isELFv2ABI && !CFlags.IsPatchPoint)
6468 RegsToPass.push_back(std::make_pair((
unsigned)PPC::X12,
Callee));
6474 for (
unsigned i = 0,
e = RegsToPass.size();
i !=
e; ++
i) {
6476 RegsToPass[
i].second, InFlag);
6480 if (CFlags.IsTailCall && !IsSibCall)
6484 return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
6485 Callee, SPDiff, NumBytes,
Ins, InVals, CB);
6492 "Required alignment greater than stack alignment.");
6512 return RequiredAlign <= 8;
6517 return RequiredAlign <= 4;
6527 const bool IsPPC64 = Subtarget.
isPPC64();
6539 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6541 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6542 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6546 PPC::V6, PPC::V7, PPC::V8, PPC::V9,
6547 PPC::V10, PPC::V11, PPC::V12, PPC::V13};
6552 "register width are not supported.");
6558 if (ByValSize == 0) {
6565 const unsigned StackSize =
alignTo(ByValSize, PtrAlign);
6587 assert(IsPPC64 &&
"PPC32 should have split i64 values.");
6594 LocInfo = ArgFlags.
isSExt() ? CCValAssign::LocInfo::SExt
6595 : CCValAssign::LocInfo::ZExt;
6616 for (
unsigned I = 0;
I < StoreSize;
I += PtrAlign.
value()) {
6617 if (
unsigned Reg = State.
AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) {
6618 assert(FReg &&
"An FPR should be available when a GPR is reserved.");
6651 const unsigned VecSize = 16;
6652 const Align VecAlign(VecSize);
6669 const unsigned PtrSize = IsPPC64 ? 8 : 4;
6675 while (NextRegIndex != GPRs.
size() &&
6680 assert(
Reg &&
"Allocating register unexpectedly failed.");
6693 for (
unsigned I = 0;
I != VecSize;
I += PtrSize)
6705 if (NextRegIndex == GPRs.
size()) {
6714 if (GPRs[NextRegIndex] == PPC::R9) {
6719 const unsigned FirstReg = State.
AllocateReg(PPC::R9);
6720 const unsigned SecondReg = State.
AllocateReg(PPC::R10);
6721 assert(FirstReg && SecondReg &&
6722 "Allocating R9 or R10 unexpectedly failed.");
6736 for (
unsigned I = 0;
I != VecSize;
I += PtrSize) {
6738 assert(
Reg &&
"Failed to allocated register for vararg vector argument");
6754 "i64 should have been split for 32-bit codegen.");
6762 return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
6764 return HasP8Vector ? &PPC::VSSRCRegClass : &PPC::F4RCRegClass;
6766 return HasVSX ? &PPC::VSFRCRegClass : &PPC::F8RCRegClass;
6774 return &PPC::VRRCRegClass;
6799 "Reg must be a valid argument register!");
6800 return LASize + 4 * (
Reg - PPC::R3);
6805 "Reg must be a valid argument register!");
6806 return LASize + 8 * (
Reg - PPC::X3);
6852 SDValue PPCTargetLowering::LowerFormalArguments_AIX(
6859 "Unexpected calling convention!");
6870 const bool IsPPC64 = Subtarget.
isPPC64();
6871 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
6883 CCInfo.AllocateStack(LinkageSize,
Align(PtrByteSize));
6884 CCInfo.AnalyzeFormalArguments(
Ins,
CC_AIX);
6888 for (
size_t I = 0, End = ArgLocs.size();
I != End; ) {
6902 auto HandleMemLoc = [&]() {
6905 assert((ValSize <= LocSize) &&
6906 "Object size is larger than size of MemLoc");
6909 if (LocSize > ValSize)
6910 CurArgOffset += LocSize - ValSize;
6912 const bool IsImmutable =
6919 InVals.push_back(ArgValue);
6927 assert(isVarArg &&
"Only use custom memloc for vararg.");
6930 const unsigned OriginalValNo = VA.
getValNo();
6931 (void)OriginalValNo;
6933 auto HandleCustomVecRegLoc = [&]() {
6934 assert(
I != End && ArgLocs[
I].isRegLoc() && ArgLocs[
I].needsCustom() &&
6935 "Missing custom RegLoc.");
6938 "Unexpected Val type for custom RegLoc.");
6940 "ValNo mismatch between custom MemLoc and RegLoc.");
6951 HandleCustomVecRegLoc();
6952 HandleCustomVecRegLoc();
6956 if (
I != End && ArgLocs[
I].isRegLoc() && ArgLocs[
I].needsCustom()) {
6958 "Only 2 custom RegLocs expected for 64-bit codegen.");
6959 HandleCustomVecRegLoc();
6960 HandleCustomVecRegLoc();
7004 const unsigned Size =
7011 InVals.push_back(FIN);
7030 InVals.push_back(FIN);
7034 IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7036 auto HandleRegLoc = [&, RegClass, LocVT](
const MCPhysReg PhysReg,
7038 const unsigned VReg = MF.
addLiveIn(PhysReg, RegClass);
7049 CopyFrom.
getValue(1), dl, CopyFrom,
7053 MemOps.push_back(
Store);
7059 for (;
Offset != StackSize && ArgLocs[
I].isRegLoc();
7062 "RegLocs should be for ByVal argument.");
7069 if (
Offset != StackSize) {
7071 "Expected MemLoc for remaining bytes.");
7072 assert(ArgLocs[
I].isMemLoc() &&
"Expected MemLoc for remaining bytes.");
7093 InVals.push_back(ArgValue);
7103 const unsigned MinParameterSaveArea = 8 * PtrByteSize;
7105 unsigned CallerReservedArea =
7106 std::max(CCInfo.getNextStackOffset(), LinkageSize + MinParameterSaveArea);
7112 CallerReservedArea =
7122 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7124 static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7125 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7126 const unsigned NumGPArgRegs =
array_lengthof(IsPPC64 ? GPR_64 : GPR_32);
7131 for (
unsigned GPRIndex =
7132 (CCInfo.getNextStackOffset() - LinkageSize) / PtrByteSize;
7133 GPRIndex < NumGPArgRegs; ++GPRIndex) {
7135 const unsigned VReg =
7136 IsPPC64 ? MF.
addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
7137 : MF.
addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
7142 MemOps.push_back(
Store);
7149 if (!MemOps.empty())
7155 SDValue PPCTargetLowering::LowerCall_AIX(
7168 "Unexpected calling convention!");
7170 if (CFlags.IsPatchPoint)
7178 AIXCCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
7186 const bool IsPPC64 = Subtarget.
isPPC64();
7188 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7189 CCInfo.AllocateStack(LinkageSize,
Align(PtrByteSize));
7190 CCInfo.AnalyzeCallOperands(Outs,
CC_AIX);
7198 const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
7199 const unsigned NumBytes =
std::max(LinkageSize + MinParameterSaveAreaSize,
7200 CCInfo.getNextStackOffset());
7216 for (
unsigned I = 0,
E = ArgLocs.size();
I !=
E;) {
7217 const unsigned ValNo = ArgLocs[
I].getValNo();
7230 auto GetLoad = [&](
EVT VT,
unsigned LoadOffset) {
7239 unsigned LoadOffset = 0;
7242 while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[
I].isRegLoc()) {
7244 MemOpChains.push_back(
Load.getValue(1));
7245 LoadOffset += PtrByteSize;
7248 "Unexpected location for pass-by-value argument.");
7249 RegsToPass.push_back(std::make_pair(ByValVA.
getLocReg(),
Load));
7252 if (LoadOffset == ByValSize)
7256 assert(ArgLocs[
I].getValNo() == ValNo &&
7257 "Expected additional location for by-value argument.");
7259 if (ArgLocs[
I].isMemLoc()) {
7260 assert(LoadOffset < ByValSize &&
"Unexpected memloc for by-val arg.");
7265 Chain = CallSeqStart = createMemcpyOutsideCallSeq(
7271 CallSeqStart, MemcpyFlags, DAG, dl);
7280 const unsigned ResidueBytes = ByValSize % PtrByteSize;
7281 assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&
7282 "Unexpected register residue for by-value argument.");
7284 for (
unsigned Bytes = 0; Bytes != ResidueBytes;) {
7290 MemOpChains.push_back(
Load.getValue(1));
7298 "Unexpected load emitted during handling of pass-by-value "
7306 ResidueVal = ResidueVal ? DAG.
getNode(
ISD::OR, dl, PtrVT, ResidueVal,
7312 RegsToPass.push_back(std::make_pair(ByValVA.
getLocReg(), ResidueVal));
7334 RegsToPass.push_back(std::make_pair(VA.
getLocReg(),
Arg));
7341 assert(CFlags.IsVarArg &&
"Custom MemLocs only used for Vector args.");
7348 MemOpChains.push_back(
Store);
7349 const unsigned OriginalValNo = VA.
getValNo();
7351 unsigned LoadOffset = 0;
7352 auto HandleCustomVecRegLoc = [&]() {
7353 assert(
I !=
E &&
"Unexpected end of CCvalAssigns.");
7354 assert(ArgLocs[
I].isRegLoc() && ArgLocs[
I].needsCustom() &&
7355 "Expected custom RegLoc.");
7358 "Custom MemLoc ValNo and custom RegLoc ValNo must match.");
7362 MemOpChains.push_back(
Load.getValue(1));
7363 RegsToPass.push_back(std::make_pair(RegVA.
getLocReg(),
Load));
7364 LoadOffset += PtrByteSize;
7370 HandleCustomVecRegLoc();
7371 HandleCustomVecRegLoc();
7373 if (
I !=
E && ArgLocs[
I].isRegLoc() && ArgLocs[
I].needsCustom() &&
7374 ArgLocs[
I].getValNo() == OriginalValNo) {
7376 "Only 2 custom RegLocs expected for 64-bit codegen.");
7377 HandleCustomVecRegLoc();
7378 HandleCustomVecRegLoc();
7388 MemOpChains.push_back(
7396 "Unexpected register handling for calling convention.");
7402 "Custom register handling only expected for VarArg.");
7410 RegsToPass.push_back(std::make_pair(VA.
getLocReg(), ArgAsInt));
7411 else if (
Arg.getValueType().getFixedSizeInBits() <
7414 RegsToPass.push_back(std::make_pair(
7420 "Unexpected custom register for argument!");
7424 RegsToPass.push_back(std::make_pair(
7434 RegsToPass.push_back(std::make_pair(
7441 if (!MemOpChains.empty())
7446 if (CFlags.IsIndirect) {
7447 assert(!CFlags.IsTailCall &&
"Indirect tail-calls not supported.");
7451 const unsigned TOCSaveOffset =
7467 for (
auto Reg : RegsToPass) {
7472 const int SPDiff = 0;
7473 return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
7474 Callee, SPDiff, NumBytes,
Ins, InVals, CB);
7484 return CCInfo.CheckReturn(
7499 CCInfo.AnalyzeReturn(Outs,
7508 for (
unsigned i = 0, RealResIdx = 0;
i != RVLocs.size(); ++
i, ++RealResIdx) {
7550 RetOps.push_back(
Flag);
7556 PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(
SDValue Op,
7561 EVT IntVT =
Op.getValueType();
7565 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7567 SDValue Ops[2] = {Chain, FPSIdx};
7581 bool isPPC64 = Subtarget.
isPPC64();
7582 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
7602 bool isPPC64 = Subtarget.
isPPC64();
7623 PPCTargetLowering::getFramePointerFrameIndex(
SelectionDAG & DAG)
const {
7625 bool isPPC64 = Subtarget.
isPPC64();
7659 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7660 SDValue Ops[3] = { Chain, NegSize, FPSIdx };
7671 bool isPPC64 = Subtarget.
isPPC64();
7683 Op.getOperand(0),
Op.getOperand(1));
7690 Op.getOperand(0),
Op.getOperand(1));
7694 if (
Op.getValueType().isVector())
7695 return LowerVectorLoad(
Op, DAG);
7698 "Custom lowering only for i1 loads");
7719 if (
Op.getOperand(1).getValueType().isVector())
7720 return LowerVectorStore(
Op, DAG);
7723 "Custom lowering only for i1 stores");
7743 "Custom lowering only for i1 results");
7771 EVT TrgVT =
Op.getValueType();
7783 if (SrcSize > 256 ||
7795 if (SrcSize == 256) {
7806 Op1 = SrcSize == 128 ? N1 :
widenVec(DAG, N1,
DL);
7814 for (
unsigned i = 0;
i < TrgNumElts; ++
i)
7815 ShuffV.push_back(
i * SizeMult);
7817 for (
unsigned i = 1;
i <= TrgNumElts; ++
i)
7818 ShuffV.push_back(
i * SizeMult - 1);
7821 for (
unsigned i = TrgNumElts;
i < WideNumElts; ++
i)
7823 ShuffV.push_back(WideNumElts + 1);
7834 EVT ResVT =
Op.getValueType();
7835 EVT CmpVT =
Op.getOperand(0).getValueType();
7836 SDValue LHS =
Op.getOperand(0), RHS =
Op.getOperand(1);
7837 SDValue TV =
Op.getOperand(2), FV =
Op.getOperand(3);
7860 if (Subtarget.
hasP9Vector() && LHS == TV && RHS == FV) {
7988 bool IsStrict =
Op->isStrictFPOpcode();
7997 SDValue Src =
Op.getOperand(IsStrict ? 1 : 0);
8011 switch (
Op.getSimpleValueType().SimpleTy) {
8019 "i64 FP_TO_UINT is supported only with FPCVT");
8025 {Chain, Src}, Flags);
8032 void PPCTargetLowering::LowerFP_TO_INTForReuse(
SDValue Op, ReuseLoadInfo &RLI,
8034 const SDLoc &dl)
const {
8038 bool IsStrict =
Op->isStrictFPOpcode();
8042 (IsSigned || Subtarget.
hasFPCVT());
8044 int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
8053 Alignment =
Align(4);
8056 SDValue Ops[] = { Chain, Tmp, FIPtr };
8060 Chain = DAG.
getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment);
8064 if (
Op.getValueType() ==
MVT::i32 && !i32Stack) {
8073 RLI.Alignment = Alignment;
8081 const SDLoc &dl)
const {
8084 if (
Op->isStrictFPOpcode())
8091 const SDLoc &dl)
const {
8092 bool IsStrict =
Op->isStrictFPOpcode();
8095 SDValue Src =
Op.getOperand(IsStrict ? 1 : 0);
8097 EVT DstVT =
Op.getValueType();
8124 {Op.getOperand(0), Lo, Hi}, Flags);
8127 {Res.getValue(1), Res}, Flags);
8133 const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
8157 {Chain, Src, FltOfs}, Flags);
8161 {Chain, Val}, Flags);
8164 dl, DstVT, Sel, DAG.
getConstant(0, dl, DstVT), SignMask);
8183 return LowerFP_TO_INTDirectMove(
Op, DAG, dl);
8186 LowerFP_TO_INTForReuse(
Op, RLI, DAG, dl);
8188 return DAG.
getLoad(
Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8189 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8200 bool PPCTargetLowering::canReuseLoadAddress(
SDValue Op,
EVT MemVT,
8205 if (
Op->isStrictFPOpcode())
8214 Op.getOperand(0).getValueType())) {
8216 LowerFP_TO_INTForReuse(
Op, RLI, DAG, dl);
8221 if (!
LD ||
LD->getExtensionType() != ET ||
LD->isVolatile() ||
8222 LD->isNonTemporal())
8224 if (
LD->getMemoryVT() != MemVT)
8234 RLI.Ptr =
LD->getBasePtr();
8235 if (
LD->isIndexed() && !
LD->getOffset().isUndef()) {
8237 "Non-pre-inc AM on PPC?");
8242 RLI.Chain =
LD->getChain();
8243 RLI.MPI =
LD->getPointerInfo();
8244 RLI.IsDereferenceable =
LD->isDereferenceable();
8245 RLI.IsInvariant =
LD->isInvariant();
8246 RLI.Alignment =
LD->getAlign();
8247 RLI.AAInfo =
LD->getAAInfo();
8248 RLI.Ranges =
LD->getRanges();
8250 RLI.ResChain =
SDValue(
LD,
LD->isIndexed() ? 2 : 1);
8258 void PPCTargetLowering::spliceIntoChain(
SDValue ResChain,
8264 SDLoc dl(NewResChain);
8269 "A new TF really is required here");
8278 bool PPCTargetLowering::directMoveIsProfitable(
const SDValue &
Op)
const {
8279 SDNode *Origin =
Op.getOperand(0).getNode();
8294 if (UI.getUse().get().getResNo() != 0)
8324 if (
Op->isStrictFPOpcode()) {
8326 Chain =
Op.getOperand(0);
8330 return DAG.
getNode(ConvOpc, dl, ConvTy, Src);
8338 const SDLoc &dl)
const {
8341 "Invalid floating point type as target of conversion");
8343 "Int to FP conversions with direct moves require FPCVT");
8344 SDValue Src =
Op.getOperand(
Op->isStrictFPOpcode() ? 1 : 0);
8367 for (
unsigned i = 1;
i < NumConcat; ++
i)
8374 const SDLoc &dl)
const {
8375 bool IsStrict =
Op->isStrictFPOpcode();
8376 unsigned Opc =
Op.getOpcode();
8377 SDValue Src =
Op.getOperand(IsStrict ? 1 : 0);
8380 "Unexpected conversion type");
8382 "Supports conversions to v2f64/v4f32 only.");
8397 for (
unsigned i = 0;
i < WideNumElts; ++
i)
8398 ShuffV.push_back(
i + WideNumElts);
8400 int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
8401 int SaveElts = FourEltRes ? 4 : 2;
8403 for (
int i = 0;
i < SaveElts;
i++)
8404 ShuffV[
i * Stride] =
i;
8406 for (
int i = 1;
i <= SaveElts;
i++)
8407 ShuffV[
i * Stride - 1] =
i - 1;
8415 Arrange = DAG.
getBitcast(IntermediateVT, Arrange);
8428 {Op.getOperand(0), Extend}, Flags);
8430 return DAG.
getNode(Opc, dl,
Op.getValueType(), Extend);
8438 bool IsStrict =
Op->isStrictFPOpcode();
8439 SDValue Src =
Op.getOperand(IsStrict ? 1 : 0);
8447 EVT OutVT =
Op.getValueType();
8450 return LowerINT_TO_FPVector(
Op, DAG, dl);
8474 return LowerINT_TO_FPDirectMove(
Op, DAG, dl);
8477 "UINT_TO_FP is supported only with FPCVT");
8533 if (canReuseLoadAddress(SINT,
MVT::i64, RLI, DAG)) {
8535 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8536 spliceIntoChain(RLI.ResChain,
Bits.getValue(1), DAG);
8541 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8542 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8546 spliceIntoChain(RLI.ResChain,
Bits.getValue(1), DAG);
8551 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8552 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8556 spliceIntoChain(RLI.ResChain,
Bits.getValue(1), DAG);
8574 "Expected an i32 store");
8580 RLI.Alignment =
Align(4);
8584 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8585 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8590 Chain =
Bits.getValue(1);
8602 {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
8611 "Unhandled INT_TO_FP type in custom expander!");
8624 if (!(ReusingLoad = canReuseLoadAddress(Src,
MVT::i32, RLI, DAG))) {
8634 "Expected an i32 store");
8640 RLI.Alignment =
Align(4);
8645 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8646 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8652 spliceIntoChain(RLI.ResChain, Ld.
getValue(1), DAG);
8655 "i32->FP without LFIWAX supported only on PPC64");
8664 Chain, dl, Ext64, FIdx,
8683 {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
8714 EVT VT =
Op.getValueType();
8720 Chain =
MFFS.getValue(1);
8734 "Stack slot adjustment is valid only on big endian subtargets!");
8764 EVT VT =
Op.getValueType();
8768 VT ==
Op.getOperand(1).getValueType() &&
8788 SDValue OutOps[] = { OutLo, OutHi };
8793 EVT VT =
Op.getValueType();
8797 VT ==
Op.getOperand(1).getValueType() &&
8817 SDValue OutOps[] = { OutLo, OutHi };
8823 EVT VT =
Op.getValueType();
8826 VT ==
Op.getOperand(1).getValueType() &&
8846 SDValue OutOps[] = { OutLo, OutHi };
8853 EVT VT =
Op.getValueType();
8860 EVT AmtVT =
Z.getValueType();
8883 static const MVT VTys[] = {
8890 if (Val == ((1LLU << (SplatSize * 8)) - 1)) {
8895 EVT CanonicalVT = VTys[SplatSize-1];
8939 for (
unsigned i = 0;
i != 16; ++
i)
8967 bool IsSplat =
true;
8968 bool IsLoad =
false;
8995 return !(IsSplat && IsLoad);
9033 APFloat APFloatToConvert = ArgAPFloat;
9034 bool LosesInfo =
true;
9039 ArgAPFloat = APFloatToConvert;
9061 APFloat APFloatToConvert = ArgAPFloat;
9062 bool LosesInfo =
true;
9066 return (!LosesInfo && !APFloatToConvert.
isDenormal());
9078 assert(BVN &&
"Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
9081 APInt APSplatBits, APSplatUndef;
9082 unsigned SplatBitSize;
9084 bool BVNIsConstantSplat =
9092 if (BVNIsConstantSplat && (SplatBitSize == 64) &&
9131 if (!BVNIsConstantSplat || SplatBitSize > 32) {
9133 bool IsPermutedLoad =
false;
9142 unsigned ElementSize =
LD->getMemoryVT().getScalarSizeInBits();
9147 unsigned NumUsesOfInputLD = 128 / ElementSize;
9149 if (BVInOp.isUndef())
9151 assert(NumUsesOfInputLD > 0 &&
"No uses of input LD of a build_vector?");
9153 ((Subtarget.
hasVSX() && ElementSize == 64) ||
9154 (Subtarget.
hasP9Vector() && ElementSize == 32))) {
9162 Ops,
LD->getMemoryVT(),
LD->getMemOperand());
9183 unsigned SplatSize = SplatBitSize / 8;
9188 if (SplatBits == 0) {
9204 Op.getValueType(), DAG, dl);
9216 int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
9218 if (SextVal >= -16 && SextVal <= 15)
9231 if (SextVal >= -32 && SextVal <= 31) {
9240 if (VT ==
Op.getValueType())
9249 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
9263 static const signed char SplatCsts[] = {
9264 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
9265 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
9271 int i = SplatCsts[idx];
9275 unsigned TypeShiftAmt =
i & (SplatBitSize-1);
9278 if (SextVal == (
int)((unsigned)
i << TypeShiftAmt)) {
9280 static const unsigned IIDs[] = {
9281 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
9282 Intrinsic::ppc_altivec_vslw
9289 if (SextVal == (
int)((
unsigned)
i >> TypeShiftAmt)) {
9291 static const unsigned IIDs[] = {
9292 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
9293 Intrinsic::ppc_altivec_vsrw
9300 if (SextVal == (
int)(((
unsigned)
i << TypeShiftAmt) |
9301 ((
unsigned)
i >> (SplatBitSize-TypeShiftAmt)))) {
9303 static const unsigned IIDs[] = {
9304 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
9305 Intrinsic::ppc_altivec_vrlw
9312 if (SextVal == (
int)(((
unsigned)
i << 8) | (
i < 0 ? 0xFF : 0))) {
9318 if (SextVal == (
int)(((
unsigned)
i << 16) | (
i < 0 ? 0xFFFF : 0))) {
9324 if (SextVal == (
int)(((
unsigned)
i << 24) | (
i < 0 ? 0xFFFFFF : 0))) {
9339 unsigned OpNum = (PFEntry >> 26) & 0x0F;
9340 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
9341 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
9357 if (LHSID == (1*9+2)*9+3)
return LHS;
9358 assert(LHSID == ((4*9+5)*9+6)*9+7 &&
"Illegal OP_COPY!");
9370 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
9371 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
9372 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
9373 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
9376 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
9377 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
9378 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
9379 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
9382 for (
unsigned i = 0;
i != 16; ++
i)
9383 ShufIdxs[
i] = (
i&3)+0;
9386 for (
unsigned i = 0;
i != 16; ++
i)
9387 ShufIdxs[
i] = (
i&3)+4;
9390 for (
unsigned i = 0;
i != 16; ++
i)
9391 ShufIdxs[
i] = (
i&3)+8;
9394 for (
unsigned i = 0;
i != 16; ++
i)
9395 ShufIdxs[
i] = (
i&3)+12;
9416 const unsigned BytesInVector = 16;
9421 unsigned ShiftElts = 0, InsertAtByte = 0;
9425 unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1,
9426 0, 15, 14, 13, 12, 11, 10, 9};
9427 unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
9428 1, 2, 3, 4, 5, 6, 7, 8};
9431 int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
9443 bool FoundCandidate =
false;
9447 unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
9450 for (
unsigned i = 0;
i < BytesInVector; ++
i) {
9451 unsigned CurrentElement =
Mask[
i];
9454 if (
V2.isUndef() && CurrentElement != VINSERTBSrcElem)
9457 bool OtherElementsInOrder =
true;
9460 for (
unsigned j = 0;
j < BytesInVector; ++
j) {
9467 (!
V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
9468 if (
Mask[
j] != OriginalOrder[
j] + MaskOffset) {
9469 OtherElementsInOrder =
false;
9476 if (OtherElementsInOrder) {
9483 ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
9484 : BigEndianShifts[CurrentElement & 0xF];
9485 Swap = CurrentElement < BytesInVector;
9487 InsertAtByte = IsLE ? BytesInVector - (
i + 1) :
i;
9488 FoundCandidate =
true;
9493 if (!FoundCandidate)
9517 const unsigned NumHalfWords = 8;
9518 const unsigned BytesInVector = NumHalfWords * 2;
9527 unsigned ShiftElts = 0, InsertAtByte = 0;
9531 unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
9532 unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
9535 uint32_t OriginalOrderLow = 0x1234567;
9536 uint32_t OriginalOrderHigh = 0x89ABCDEF;
9539 for (
unsigned i = 0;
i < NumHalfWords; ++
i) {
9540 unsigned MaskShift = (NumHalfWords - 1 -
i) * 4;
9557 bool FoundCandidate =
false;
9560 for (
unsigned i = 0;
i < NumHalfWords; ++
i) {
9561 unsigned MaskShift = (NumHalfWords - 1 -
i) * 4;
9563 uint32_t MaskOtherElts = ~(0xF << MaskShift);
9571 unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
9572 TargetOrder = OriginalOrderLow;
9576 if (MaskOneElt == VINSERTHSrcElem &&
9577 (
Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9578 InsertAtByte = IsLE ? BytesInVector - (
i + 1) * 2 :
i * 2;
9579 FoundCandidate =
true;
9585 (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
9587 if ((
Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9589 ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
9590 : BigEndianShifts[MaskOneElt & 0x7];
9591 InsertAtByte = IsLE ? BytesInVector - (
i + 1) * 2 :
i * 2;
9592 Swap = MaskOneElt < NumHalfWords;
9593 FoundCandidate =
true;
9599 if (!FoundCandidate)
9634 auto ShuffleMask = SVN->
getMask();
9646 ShuffleMask = cast<ShuffleVectorSDNode>(
VecShuffle)->getMask();
9655 APInt APSplatValue, APSplatUndef;
9656 unsigned SplatBitSize;
9672 if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
9673 (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
9674 ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
9676 else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
9677 (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
9678 ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
9686 for (; SplatBitSize < 32; SplatBitSize <<= 1)
9687 SplatVal |= (SplatVal << SplatBitSize);
9702 "Only set v1i128 as custom, other type shouldn't reach here!");
9707 if (SHLAmt % 8 == 0) {
9709 std::iota(
Mask.begin(),
Mask.end(), 0);
9739 if (
SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
9740 if (!isa<ShuffleVectorSDNode>(NewShuffle))
9743 SVOp = cast<ShuffleVectorSDNode>(
Op);
9744 V1 =
Op.getOperand(0);
9745 V2 =
Op.getOperand(1);
9747 EVT VT =
Op.getValueType();
9750 unsigned ShiftElts, InsertAtByte;
9756 bool IsPermutedLoad =
false;
9758 if (InputLoad && Subtarget.
hasVSX() &&
V2.isUndef() &&
9768 if (IsPermutedLoad) {
9769 assert((isLittleEndian || IsFourByte) &&
9770 "Unexpected size for permuted load on big endian target");
9771 SplatIdx += IsFourByte ? 2 : 1;
9772 assert((SplatIdx < (IsFourByte ? 4 : 2)) &&
9773 "Splat of a value outside of the loaded memory");
9778 if ((IsFourByte && Subtarget.
hasP9Vector()) || !IsFourByte) {
9781 Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
9783 Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
9787 if (
LD->getValueType(0).getSizeInBits() == (IsFourByte ? 32 : 64))
9803 Ops,
LD->getMemoryVT(),
LD->getMemOperand());
9831 if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))
9832 return SplatInsertNode;
9837 if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
9840 if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
9844 if (Subtarget.
hasVSX() &&
9857 if (Subtarget.
hasVSX() &&
9890 if (Subtarget.
hasVSX()) {
9935 unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
9955 unsigned PFIndexes[4];
9956 bool isFourElementShuffle =
true;
9957 for (
unsigned i = 0;
i != 4 && isFourElementShuffle; ++
i) {
9959 for (
unsigned j = 0;
j != 4; ++
j) {
9960 if (PermMask[
i*4+
j] < 0)
9963 unsigned ByteSource = PermMask[
i*4+
j];
9964 if ((ByteSource & 3) !=
j) {
9965 isFourElementShuffle =
false;
9970 EltNo = ByteSource/4;
9971 }
else if (EltNo != ByteSource/4) {
9972 isFourElementShuffle =
false;
9976 PFIndexes[
i] = EltNo;
9984 if (isFourElementShuffle && !isLittleEndian) {
9986 unsigned PFTableIndex =
9987 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
9990 unsigned Cost = (PFEntry >> 30);
10009 if (
V2.isUndef())
V2 = V1;
10023 unsigned SrcElt = PermMask[
i] < 0 ? 0 : PermMask[
i];
10025 for (
unsigned j = 0;
j != BytesPerElement; ++
j)
10026 if (isLittleEndian)
10027 ResultMask.push_back(DAG.
getConstant(31 - (SrcElt*BytesPerElement +
j),
10030 ResultMask.push_back(DAG.
getConstant(SrcElt*BytesPerElement +
j, dl,
10034 ShufflesHandledWithVPERM++;
10036 LLVM_DEBUG(
dbgs() <<
"Emitting a VPERM for the following shuffle:\n");
10038 LLVM_DEBUG(
dbgs() <<
"With the following permute control vector:\n");
10041 if (isLittleEndian)
10043 V2, V1, VPermMask);
10046 V1,
V2, VPermMask);
10054 unsigned IntrinsicID =
10055 cast<ConstantSDNode>(Intrin.
getOperand(0))->getZExtValue();
10058 switch (IntrinsicID) {
10062 case Intrinsic::ppc_altivec_vcmpbfp_p:
10066 case Intrinsic::ppc_altivec_vcmpeqfp_p:
10070 case Intrinsic::ppc_altivec_vcmpequb_p:
10074 case Intrinsic::ppc_altivec_vcmpequh_p:
10078 case Intrinsic::ppc_altivec_vcmpequw_p:
10082 case Intrinsic::ppc_altivec_vcmpequd_p:
10089 case Intrinsic::ppc_altivec_vcmpneb_p:
10090 case Intrinsic::ppc_altivec_vcmpneh_p:
10091 case Intrinsic::ppc_altivec_vcmpnew_p:
10092 case Intrinsic::ppc_altivec_vcmpnezb_p:
10093 case Intrinsic::ppc_altivec_vcmpnezh_p:
10094 case Intrinsic::ppc_altivec_vcmpnezw_p:
10096 switch (IntrinsicID) {
10099 case Intrinsic::ppc_altivec_vcmpneb_p:
10102 case Intrinsic::ppc_altivec_vcmpneh_p:
10105 case Intrinsic::ppc_altivec_vcmpnew_p:
10108 case Intrinsic::ppc_altivec_vcmpnezb_p:
10111 case Intrinsic::ppc_altivec_vcmpnezh_p:
10114 case Intrinsic::ppc_altivec_vcmpnezw_p:
10122 case Intrinsic::ppc_altivec_vcmpgefp_p:
10126 case Intrinsic::ppc_altivec_vcmpgtfp_p:
10130 case Intrinsic::ppc_altivec_vcmpgtsb_p:
10134 case Intrinsic::ppc_altivec_vcmpgtsh_p:
10138 case Intrinsic::ppc_altivec_vcmpgtsw_p:
10142 case Intrinsic::ppc_altivec_vcmpgtsd_p:
10149 case Intrinsic::ppc_altivec_vcmpgtub_p:
10153 case Intrinsic::ppc_altivec_vcmpgtuh_p:
10157 case Intrinsic::ppc_altivec_vcmpgtuw_p:
10161 case Intrinsic::ppc_altivec_vcmpgtud_p:
10169 case Intrinsic::ppc_altivec_vcmpequq:
10170 case Intrinsic::ppc_altivec_vcmpgtsq:
10171 case Intrinsic::ppc_altivec_vcmpgtuq:
10174 switch (IntrinsicID) {
10177 case Intrinsic::ppc_altivec_vcmpequq:
10180 case Intrinsic::ppc_altivec_vcmpgtsq:
10183 case Intrinsic::ppc_altivec_vcmpgtuq:
10190 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10191 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10192 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10193 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10194 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10195 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10196 if (Subtarget.
hasVSX()) {
10197 switch (IntrinsicID) {
10198 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10201 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10204 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10207 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10210 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10213 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10223 case Intrinsic::ppc_altivec_vcmpbfp:
10226 case Intrinsic::ppc_altivec_vcmpeqfp:
10229 case Intrinsic::ppc_altivec_vcmpequb:
10232 case Intrinsic::ppc_altivec_vcmpequh:
10235 case Intrinsic::ppc_altivec_vcmpequw:
10238 case Intrinsic::ppc_altivec_vcmpequd:
10244 case Intrinsic::ppc_altivec_vcmpneb:
10245 case Intrinsic::ppc_altivec_vcmpneh:
10246 case Intrinsic::ppc_altivec_vcmpnew:
10247 case Intrinsic::ppc_altivec_vcmpnezb:
10248 case Intrinsic::ppc_altivec_vcmpnezh:
10249 case Intrinsic::ppc_altivec_vcmpnezw:
10251 switch (IntrinsicID) {
10254 case Intrinsic::ppc_altivec_vcmpneb:
10257 case Intrinsic::ppc_altivec_vcmpneh:
10260 case Intrinsic::ppc_altivec_vcmpnew:
10263 case Intrinsic::ppc_altivec_vcmpnezb:
10266 case Intrinsic::ppc_altivec_vcmpnezh:
10269 case Intrinsic::ppc_altivec_vcmpnezw:
10276 case Intrinsic::ppc_altivec_vcmpgefp:
10279 case Intrinsic::ppc_altivec_vcmpgtfp:
10282 case Intrinsic::ppc_altivec_vcmpgtsb:
10285 case Intrinsic::ppc_altivec_vcmpgtsh:
10288 case Intrinsic::ppc_altivec_vcmpgtsw:
10291 case Intrinsic::ppc_altivec_vcmpgtsd:
10297 case Intrinsic::ppc_altivec_vcmpgtub:
10300 case Intrinsic::ppc_altivec_vcmpgtuh:
10303 case Intrinsic::ppc_altivec_vcmpgtuw:
10306 case Intrinsic::ppc_altivec_vcmpgtud:
10312 case Intrinsic::ppc_altivec_vcmpequq_p:
10313 case Intrinsic::ppc_altivec_vcmpgtsq_p:
10314 case Intrinsic::ppc_altivec_vcmpgtuq_p:
10317 switch (IntrinsicID) {
10320 case Intrinsic::ppc_altivec_vcmpequq_p:
10323 case Intrinsic::ppc_altivec_vcmpgtsq_p:
10326 case Intrinsic::ppc_altivec_vcmpgtuq_p:
10340 unsigned IntrinsicID =
10341 cast<ConstantSDNode>(
Op.getOperand(0))->getZExtValue();
10345 switch (IntrinsicID) {
10346 case Intrinsic::thread_pointer:
10352 case Intrinsic::ppc_mma_disassemble_acc:
10353 case Intrinsic::ppc_vsx_disassemble_pair: {
10356 if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
10361 for (
int VecNo = 0; VecNo < NumVecs; VecNo++) {
10367 RetOps.push_back(Extract);
10383 Op.getOperand(1),
Op.getOperand(2),
10406 switch (cast<ConstantSDNode>(
Op.getOperand(1))->getZExtValue()) {
10409 BitNo = 0; InvertBit =
false;
10412 BitNo = 0; InvertBit =
true;
10415 BitNo = 2; InvertBit =
false;
10418 BitNo = 2; InvertBit =
true;
10440 int ArgStart = isa<ConstantSDNode>(
Op.getOperand(0)) ? 0 : 1;
10442 switch (cast<ConstantSDNode>(
Op.getOperand(ArgStart))->getZExtValue()) {
10443 case Intrinsic::ppc_cfence: {
10444 assert(ArgStart == 1 &&
"llvm.ppc.cfence must carry a chain argument.");
10445 assert(Subtarget.
isPPC64() &&
"Only 64-bit is supported for now.");
10448 Op.getOperand(ArgStart + 1)),
10469 int VectorIndex = 0;
10482 "Expecting an atomic compare-and-swap here.");
10484 auto *AtomicNode = cast<AtomicSDNode>(
Op.getNode());
10485 EVT MemVT = AtomicNode->getMemoryVT();
10503 for (
int i = 0,
e = AtomicNode->getNumOperands();
i <
e;
i++)
10504 Ops.push_back(AtomicNode->getOperand(
i));
10532 "Should only be called for ISD::INSERT_VECTOR_ELT");
10536 EVT VT =
Op.getValueType();
10572 unsigned InsertAtElement =
C->getZExtValue();
10573 unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
10575 InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
10589 EVT VT =
Op.getValueType();
10598 "Type unsupported without MMA");
10600 "Type unsupported without paired vector support");
10605 for (
unsigned Idx = 0; Idx < NumVecs; ++Idx) {
10613 Loads.push_back(
Load);
10614 LoadChains.push_back(
Load.getValue(1));
10635 EVT StoreVT =
Value.getValueType();
10644 "Type unsupported without MMA");
10646 "Type unsupported without paired vector support");
10649 unsigned NumVecs = 2;
10654 for (
unsigned Idx = 0; Idx < NumVecs; ++Idx) {
10655 unsigned VecNum = Subtarget.
isLittleEndian() ? NumVecs - 1 - Idx : Idx;
10659 DAG.
getStore(StoreChain, dl, Elt, BasePtr,
10665 Stores.push_back(
Store);
10674 SDValue LHS =
Op.getOperand(0), RHS =
Op.getOperand(1);
10699 SDValue LHS =
Op.getOperand(0), RHS =
Op.getOperand(1);
10717 for (
unsigned i = 0;
i != 8; ++
i) {
10718 if (isLittleEndian) {
10720 Ops[
i*2+1] = 2*
i+16;
10723 Ops[
i*2+1] = 2*
i+1+16;
10726 if (isLittleEndian)
10736 bool IsStrict =
Op->isStrictFPOpcode();
10737 if (
Op.getOperand(IsStrict ? 1 : 0).getValueType() ==
MVT::f128 &&
10748 "Should only be called for ISD::FP_EXTEND");
10765 "Node should have 2 operands with second one being a constant!");
10771 int Idx = cast<ConstantSDNode>(Op0.
getOperand(1))->getZExtValue();
10777 int DWord = Idx >> 1;
10797 SDValue LoadOps[] = {
LD->getChain(),
LD->getBasePtr()};
10800 LD->getMemoryVT(),
LD->getMemOperand());
10810 SDValue LoadOps[] = {
LD->getChain(),
LD->getBasePtr()};
10813 LD->getMemoryVT(),
LD->getMemOperand());
10824 switch (
Op.getOpcode()) {
10847 return LowerGET_DYNAMIC_AREA_OFFSET(
Op, DAG);
10873 case ISD::FSHL:
return LowerFunnelShift(
Op, DAG);
10874 case ISD::FSHR:
return LowerFunnelShift(
Op, DAG);
10886 return LowerFP_ROUND(
Op, DAG);
10899 return LowerINTRINSIC_VOID(
Op, DAG);
10901 return LowerBSWAP(
Op, DAG);
10903 return LowerATOMIC_CMP_SWAP(
Op, DAG);
10911 switch (
N->getOpcode()) {
10913 llvm_unreachable(
"Do not know how to custom type legalize this operation!");
10924 if (cast<ConstantSDNode>(
N->getOperand(1))->getZExtValue() !=
10925 Intrinsic::loop_decrement)
10929 "Unexpected result type for CTR decrement intrinsic");
10931 N->getValueType(0));
10944 EVT VT =
N->getValueType(0);
10959 if (
N->getOperand(
N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
10965 if (!
N->getValueType(0).isVector())
10994 return Builder.CreateCall(Func, {});
11002 if (Ord == AtomicOrdering::SequentiallyConsistent)
11016 if (isa<LoadInst>(Inst) && Subtarget.
isPPC64())
11019 Builder.GetInsertBlock()->getParent()->getParent(),
11020 Intrinsic::ppc_cfence, {Inst->getType()}),
11030 unsigned AtomicSize,
11031 unsigned BinOpcode,
11032 unsigned CmpOpcode,
11033 unsigned CmpPred)
const {
11037 auto LoadMnemonic = PPC::LDARX;
11038 auto StoreMnemonic = PPC::STDCX;
11039 switch (AtomicSize) {
11043 LoadMnemonic = PPC::LBARX;
11044 StoreMnemonic = PPC::STBCX;
11048 LoadMnemonic = PPC::LHARX;
11049 StoreMnemonic = PPC::STHCX;
11053 LoadMnemonic = PPC::LWARX;
11054 StoreMnemonic = PPC::STWCX;
11057 LoadMnemonic = PPC::LDARX;
11058 StoreMnemonic = PPC::STDCX;
11074 CmpOpcode ?
F->CreateMachineBasicBlock(LLVM_BB) :
nullptr;
11076 F->insert(It, loopMBB);
11078 F->insert(It, loop2MBB);
11079 F->insert(It, exitMBB);
11085 Register TmpReg = (!BinOpcode) ? incr :
11087 : &PPC::GPRCRegClass);
11092 BB->addSuccessor(loopMBB);
11118 if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
11120 BuildMI(
BB, dl,
TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
11130 BB->addSuccessor(loop2MBB);
11131 BB->addSuccessor(exitMBB);
11138 BB->addSuccessor(loopMBB);
11139 BB->addSuccessor(exitMBB);
11148 switch(
MI.getOpcode()) {
11152 return TII->isSignExtended(
MI);
11176 case PPC::EXTSB8_32_64:
11177 case PPC::EXTSB8_rec:
11178 case PPC::EXTSB_rec:
11181 case PPC::EXTSH8_32_64:
11182 case PPC::EXTSH8_rec:
11183 case PPC::EXTSH_rec:
11186 case PPC::EXTSWSLI_32_64:
11187 case PPC::EXTSWSLI_32_64_rec:
11188 case PPC::EXTSWSLI_rec:
11189 case PPC::EXTSW_32:
11190 case PPC::EXTSW_32_64:
11191 case PPC::EXTSW_32_64_rec:
11192 case PPC::EXTSW_rec:
11195 case PPC::SRAWI_rec:
11196 case PPC::SRAW_rec:
11205 unsigned BinOpcode,
unsigned CmpOpcode,
unsigned CmpPred)
const {
11218 if (CmpOpcode == PPC::CMPW && !IsSignExtended) {
11220 BuildMI(*
BB,
MI, dl,
TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueReg)
11221 .
addReg(
MI.getOperand(3).getReg());
11222 MI.getOperand(3).setReg(ValueReg);
11233 bool is64bit = Subtarget.
isPPC64();
11235 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
11246 CmpOpcode ?
F->CreateMachineBasicBlock(LLVM_BB) :
nullptr;
11248 F->insert(It, loopMBB);
11250 F->insert(It, loop2MBB);
11251 F->insert(It, exitMBB);
11257 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
11280 BB->addSuccessor(loopMBB);
11302 if (ptrA != ZeroReg) {
11304 BuildMI(
BB, dl,
TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
11313 .
addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
11316 .
addImm(is8bit ? 28 : 27);
11317 if (!isLittleEndian)
11320 .
addImm(is8bit ? 24 : 16);
11364 unsigned ValueReg = SReg;
11365 unsigned CmpReg = Incr2Reg;
11366 if (CmpOpcode == PPC::CMPW) {
11372 BuildMI(
BB, dl,
TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
11374 ValueReg = ValueSReg;
11384 BB->addSuccessor(loop2MBB);
11385 BB->addSuccessor(exitMBB);
11397 BB->addSuccessor(loopMBB);
11398 BB->addSuccessor(exitMBB);
11408 .
addImm(is8bit ? 24 : 16)
11429 Register DstReg =
MI.getOperand(0).getReg();
11437 "Invalid Pointer Size!");
11486 Register BufReg =
MI.getOperand(1).getReg();
11501 BaseReg = Subtarget.
isPPC64() ? PPC::X1 : PPC::R1;
11503 BaseReg = Subtarget.
isPPC64() ? PPC::BP8 : PPC::BP;
11506 TII->get(Subtarget.
isPPC64() ? PPC::STD : PPC::STW))
11529 TII->get(Subtarget.
isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
11550 TII->get(PPC::PHI), DstReg)
11554 MI.eraseFromParent();
11569 "Invalid Pointer Size!");
11572 (PVT ==
MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
11575 unsigned FP = (PVT ==
MVT::i64) ? PPC::X31 : PPC::R31;
11576 unsigned SP = (PVT ==
MVT::i64) ? PPC::X1 : PPC::R1;
11590 Register BufReg =
MI.getOperand(0).getReg();
11656 MI.eraseFromParent();
11672 "Unexpected stack alignment");
11675 unsigned StackProbeSize = 4096;
11683 return StackProbeSize ? StackProbeSize :
StackAlign;
11695 const bool isPPC64 = Subtarget.
isPPC64();
11727 MF->
insert(MBBIter, TestMBB);
11728 MF->
insert(MBBIter, BlockMBB);
11729 MF->
insert(MBBIter, TailMBB);
11734 Register DstReg =
MI.getOperand(0).getReg();
11735 Register NegSizeReg =
MI.getOperand(1).getReg();
11736 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
11747 isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
11753 ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
11754 : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
11756 .
addDef(ActualNegSizeReg)
11758 .
add(
MI.getOperand(2))
11759 .
add(
MI.getOperand(3));
11765 .
addReg(ActualNegSizeReg);
11768 int64_t NegProbeSize = -(int64_t)ProbeSize;
11774 .
addImm(NegProbeSize >> 16);
11778 .
addImm(NegProbeSize & 0xFFFF);
11787 .
addReg(ActualNegSizeReg)
11796 .
addReg(ActualNegSizeReg);
11806 BuildMI(TestMBB,
DL,
TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
11820 BuildMI(BlockMBB,
DL,
TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
11834 MaxCallFrameSizeReg)
11835 .
add(
MI.getOperand(2))
11836 .
add(
MI.getOperand(3));
11837 BuildMI(TailMBB,
DL,
TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
11839 .
addReg(MaxCallFrameSizeReg);
11848 MI.eraseFromParent();
11850 ++NumDynamicAllocaProbed;
11857 if (
MI.getOpcode() == TargetOpcode::STACKMAP ||
11858 MI.getOpcode() == TargetOpcode::PATCHPOINT) {
11860 MI.getOpcode() == TargetOpcode::PATCHPOINT &&
11873 if (
MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
11874 MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
11876 }
else if (
MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
11877 MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
11890 if (
MI.getOpcode() == PPC::SELECT_CC_I4 ||
11891 MI.getOpcode() == PPC::SELECT_CC_I8 ||
MI.getOpcode() == PPC::SELECT_I4 ||
11892 MI.getOpcode() == PPC::SELECT_I8) {
11894 if (
MI.getOpcode() == PPC::SELECT_CC_I4 ||
11895 MI.getOpcode() == PPC::SELECT_CC_I8)
11896 Cond.push_back(
MI.getOperand(4));
11899 Cond.push_back(
MI.getOperand(1));
11902 TII->insertSelect(*
BB,
MI, dl,
MI.getOperand(0).getReg(),
Cond,
11903 MI.getOperand(2).getReg(),
MI.getOperand(3).getReg());
11904 }
else if (
MI.getOpcode() == PPC::SELECT_CC_F4 ||
11905 MI.getOpcode() == PPC::SELECT_CC_F8 ||
11906 MI.getOpcode() == PPC::SELECT_CC_F16 ||
11907 MI.getOpcode() == PPC::SELECT_CC_VRRC ||
11908 MI.getOpcode() == PPC::SELECT_CC_VSFRC ||
11909 MI.getOpcode() == PPC::SELECT_CC_VSSRC ||
11910 MI.getOpcode() == PPC::SELECT_CC_VSRC ||
11911 MI.getOpcode() == PPC::SELECT_CC_SPE4 ||
11912 MI.getOpcode() == PPC::SELECT_CC_SPE ||
11913 MI.getOpcode() == PPC::SELECT_F4 ||
11914 MI.getOpcode() == PPC::SELECT_F8 ||
11915 MI.getOpcode() == PPC::SELECT_F16 ||
11916 MI.getOpcode() == PPC::SELECT_SPE ||
11917 MI.getOpcode() == PPC::SELECT_SPE4 ||
11918 MI.getOpcode() == PPC::SELECT_VRRC ||
11919 MI.getOpcode() == PPC::SELECT_VSFRC ||
11920 MI.getOpcode() == PPC::SELECT_VSSRC ||
11921 MI.getOpcode() == PPC::SELECT_VSRC) {
11936 F->insert(It, copy0MBB);
11937 F->insert(It, sinkMBB);
11945 BB->addSuccessor(copy0MBB);
11946 BB->addSuccessor(sinkMBB);
11948 if (
MI.getOpcode() == PPC::SELECT_I4 ||
MI.getOpcode() == PPC::SELECT_I8 ||
11949 MI.getOpcode() == PPC::SELECT_F4 ||
MI.getOpcode() == PPC::SELECT_F8 ||
11950 MI.getOpcode() == PPC::SELECT_F16 ||
11951 MI.getOpcode() == PPC::SELECT_SPE4 ||
11952 MI.getOpcode() == PPC::SELECT_SPE ||
11953 MI.getOpcode() == PPC::SELECT_VRRC ||
11954 MI.getOpcode() == PPC::SELECT_VSFRC ||
11955 MI.getOpcode() == PPC::SELECT_VSSRC ||
11956 MI.getOpcode() == PPC::SELECT_VSRC) {
11958 .
addReg(
MI.getOperand(1).getReg())
11961 unsigned SelectPred =
MI.getOperand(4).getImm();
11964 .
addReg(
MI.getOperand(1).getReg())
11974 BB->addSuccessor(sinkMBB);
11980 BuildMI(*
BB,
BB->begin(), dl,
TII->get(PPC::PHI),
MI.getOperand(0).getReg())
11981 .
addReg(
MI.getOperand(3).getReg())
11983 .
addReg(
MI.getOperand(2).getReg())
11985 }
else if (
MI.getOpcode() == PPC::ReadTB) {
12001 F->insert(It, readMBB);
12002 F->insert(It, sinkMBB);
12009 BB->addSuccessor(readMBB);
12031 BB->addSuccessor(readMBB);
12032 BB->addSuccessor(sinkMBB);
12033 }
else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
12035 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
12037 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
12039 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
12042 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
12044 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
12046 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
12048 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
12051 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
12053 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
12055 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
12057 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
12060 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
12062 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
12064 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
12066 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
12069 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
12071 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
12073 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
12075 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
12078 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
12080 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
12082 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
12084 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
12087 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
12089 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
12091 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
12093 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
12096 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
12098 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
12100 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
12102 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
12105 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
12107 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
12109 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
12111 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
12114 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
12116 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
12118 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
12120 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
12123 else if (
MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
12125 else if (
MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
12127 else if (
MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
12129 else if (
MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
12131 else if (
MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
12132 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
12134 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
12136 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
12137 bool is64bit =
MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
12139 auto LoadMnemonic = PPC::LDARX;
12140 auto StoreMnemonic = PPC::STDCX;
12141 switch (
MI.getOpcode()) {
12144 case PPC::ATOMIC_CMP_SWAP_I8:
12145 LoadMnemonic = PPC::LBARX;
12146 StoreMnemonic = PPC::STBCX;
12149 case PPC::ATOMIC_CMP_SWAP_I16:
12150 LoadMnemonic = PPC::LHARX;
12151 StoreMnemonic = PPC::STHCX;
12154 case PPC::ATOMIC_CMP_SWAP_I32:
12155 LoadMnemonic = PPC::LWARX;
12156 StoreMnemonic = PPC::STWCX;
12158 case PPC::ATOMIC_CMP_SWAP_I64:
12159 LoadMnemonic = PPC::LDARX;
12160 StoreMnemonic = PPC::STDCX;
12166 Register oldval =
MI.getOperand(3).getReg();
12167 Register newval =
MI.getOperand(4).getReg();
12174 F->insert(It, loop1MBB);
12175 F->insert(It, loop2MBB);
12176 F->insert(It, midMBB);
12177 F->insert(It, exitMBB);
12185 BB->addSuccessor(loop1MBB);
12200 BuildMI(
BB, dl,
TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
12207 BB->addSuccessor(loop2MBB);
12208 BB->addSuccessor(midMBB);
12220 BB->addSuccessor(loop1MBB);
12221 BB->addSuccessor(exitMBB);
12228 BB->addSuccessor(exitMBB);
12233 }
else if (
MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
12234 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
12238 bool is64bit = Subtarget.
isPPC64();
12240 bool is8bit =
MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
12245 Register oldval =
MI.getOperand(3).getReg();
12246 Register newval =
MI.getOperand(4).getReg();
12253 F->insert(It, loop1MBB);
12254 F->insert(It, loop2MBB);
12255 F->insert(It, midMBB);
12256 F->insert(It, exitMBB);
12263 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
12282 Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
12286 BB->addSuccessor(loop1MBB);
12315 if (ptrA != ZeroReg) {
12317 BuildMI(
BB, dl,
TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
12327 .
addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
12330 .
addImm(is8bit ? 28 : 27);
12331 if (!isLittleEndian)
12334 .
addImm(is8bit ? 24 : 16);
12384 BB->addSuccessor(loop2MBB);
12385 BB->addSuccessor(midMBB);
12403 BB->addSuccessor(loop1MBB);
12404 BB->addSuccessor(exitMBB);
12411 BB->addSuccessor(exitMBB);
12419 }
else if (
MI.getOpcode() == PPC::FADDrtz) {
12454 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
12455 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
12456 unsigned Opcode = (
MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
12457 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
12461 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
12465 Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
12469 .
addReg(
MI.getOperand(1).getReg())
12472 MI.getOperand(0).getReg())
12473 .
addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
12474 }
else if (
MI.getOpcode() == PPC::TCHECK_RET) {
12480 MI.getOperand(0).getReg())
12482 }
else if (
MI.getOpcode() == PPC::TBEGIN_RET) {
12484 unsigned Imm =
MI.getOperand(1).getImm();
12487 MI.getOperand(0).getReg())
12489 }
else if (
MI.getOpcode() == PPC::SETRNDi) {
12491 Register OldFPSCRReg =
MI.getOperand(0).getReg();
12505 unsigned Mode =
MI.getOperand(1).getImm();
12513 }
else if (
MI.getOpcode() == PPC::SETRND) {
12521 auto copyRegFromG8RCOrF8RC = [&] (
unsigned DestReg,
unsigned SrcReg) {
12527 unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
12530 if (RC == &PPC::F8RCRegClass) {
12533 "Unsupported RegClass.");
12535 StoreOp = PPC::STFD;
12540 (RegInfo.
getRegClass(DestReg) == &PPC::F8RCRegClass) &&
12541 "Unsupported RegClass.");
12574 Register OldFPSCRReg =
MI.getOperand(0).getReg();
12591 copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
12599 BuildMI(*
BB,
MI, dl,
TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
12613 copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
12622 }
else if (
MI.getOpcode() == PPC::SETFLM) {
12626 Register OldFPSCRReg =
MI.getOperand(0).getReg();
12630 Register NewFPSCRReg =
MI.getOperand(1).getReg();
12636 }
else if (
MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
12637 MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
12639 }
else if (
MI.getOpcode() == PPC::SPLIT_QUADWORD) {
12646 .
addUse(Src, 0, PPC::sub_gp8_x1);
12649 .
addUse(Src, 0, PPC::sub_gp8_x0);
12654 MI.eraseFromParent();
12667 int RefinementSteps = Subtarget.
hasRecipPrec() ? 1 : 3;
12670 return RefinementSteps;
12676 EVT VT =
Op.getValueType();
12703 PPCTargetLowering::getSqrtResultForDenormInput(
SDValue Op,
12706 EVT VT =
Op.getValueType();
12715 int Enabled,
int &RefinementSteps,
12716 bool &UseOneConstNR,
12717 bool Reciprocal)
const {
12723 if (RefinementSteps == ReciprocalEstimate::Unspecified)
12736 int &RefinementSteps)
const {
12742 if (RefinementSteps == ReciprocalEstimate::Unspecified)
12749 unsigned PPCTargetLowering::combineRepeatedFPDivisors()
const {
12787 unsigned Bytes,
int Dist,
12797 int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
12798 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
12801 if (
FS != BFS ||
FS != (
int)Bytes)
return false;
12805 SDValue Base1 = Loc, Base2 = BaseLoc;
12806 int64_t Offset1 = 0, Offset2 = 0;
12809 if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
12819 if (isGA1 && isGA2 && GV1 == GV2)
12820 return Offset1 == (Offset2 + Dist*Bytes);
12827 unsigned Bytes,
int Dist,
12830 EVT VT =
LS->getMemoryVT();
12837 switch (cast<ConstantSDNode>(
N->getOperand(1))->getZExtValue()) {
12838 default:
return false;
12839 case Intrinsic::ppc_altivec_lvx:
12840 case Intrinsic::ppc_altivec_lvxl:
12841 case Intrinsic::ppc_vsx_lxvw4x:
12842 case Intrinsic::ppc_vsx_lxvw4x_be:
12845 case Intrinsic::ppc_vsx_lxvd2x:
12846 case Intrinsic::ppc_vsx_lxvd2x_be:
12849 case Intrinsic::ppc_altivec_lvebx:
12852 case Intrinsic::ppc_altivec_lvehx:
12855 case Intrinsic::ppc_altivec_lvewx:
12865 switch (cast<ConstantSDNode>(
N->getOperand(1))->getZExtValue()) {
12866 default:
return false;
12867 case Intrinsic::ppc_altivec_stvx:
12868 case Intrinsic::ppc_altivec_stvxl:
12869 case Intrinsic::ppc_vsx_stxvw4x:
12872 case Intrinsic::ppc_vsx_stxvd2x:
12875 case Intrinsic::ppc_vsx_stxvw4x_be:
12878 case Intrinsic::ppc_vsx_stxvd2x_be:
12881 case Intrinsic::ppc_altivec_stvebx:
12884 case Intrinsic::ppc_altivec_stvehx:
12887 case Intrinsic::ppc_altivec_stvewx:
12905 EVT VT =
LD->getMemoryVT();
12914 while (!Queue.empty()) {
12916 if (!Visited.
insert(ChainNext).second)
12919 if (
MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
12923 if (!Visited.
count(ChainLD->getChain().getNode()))
12924 Queue.push_back(ChainLD->getChain().getNode());
12926 for (
const SDUse &
O : ChainNext->
ops())
12927 if (!Visited.
count(
O.getNode()))
12928 Queue.push_back(
O.getNode());
12930 LoadRoots.
insert(ChainNext);
12943 Queue.push_back(*
I);
12945 while (!Queue.empty()) {
12947 if (!Visited.
insert(LoadRoot).second)
12950 if (
MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
12955 UE = LoadRoot->
use_end(); UI != UE; ++UI)
12956 if (((isa<MemSDNode>(*UI) &&
12957 cast<MemSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
12959 Queue.push_back(*UI);
12992 auto Final = Shifted;
13003 DAGCombinerInfo &DCI)
const {
13011 if (!DCI.isAfterLegalizeDAG())
13017 UE =
N->use_end(); UI != UE; ++UI) {
13022 ISD::CondCode CC = cast<CondCodeSDNode>(
N->getOperand(2))->get();
13023 auto OpSize =
N->getOperand(0).getValueSizeInBits();
13027 if (OpSize <
Size) {
13045 DAGCombinerInfo &DCI)
const {
13063 if (
N->getOperand(0).getValueType() !=
MVT::i32 &&
13064 N->getOperand(0).getValueType() !=
MVT::i64)
13072 cast<CondCodeSDNode>(
N->getOperand(
13074 unsigned OpBits =
N->getOperand(0).getValueSizeInBits();
13085 return (
N->getOpcode() ==
ISD::SETCC ? ConvertSETCCToSubtract(
N, DCI)
13108 if (
N->getOperand(0).getOpcode() !=
ISD::AND &&
13109 N->getOperand(0).getOpcode() !=
ISD::OR &&
13110 N->getOperand(0).getOpcode() !=
ISD::XOR &&
13120 N->getOperand(1).getOpcode() !=
ISD::AND &&
13121 N->getOperand(1).getOpcode() !=
ISD::OR &&
13122 N->getOperand(1).getOpcode() !=
ISD::XOR &&
13135 for (
unsigned i = 0;
i < 2; ++
i) {
13139 N->getOperand(
i).getOperand(0).getValueType() ==
MVT::i1) ||
13140 isa<ConstantSDNode>(
N->getOperand(
i)))
13141 Inputs.push_back(
N->getOperand(
i));
13143 BinOps.push_back(
N->getOperand(
i));
13151 while (!BinOps.empty()) {
13157 PromOps.push_back(BinOp);
13193 for (
unsigned i = 0, ie = Inputs.size();
i != ie; ++
i) {
13194 if (isa<ConstantSDNode>(Inputs[
i]))
13198 UE = Inputs[
i].getNode()->use_end();
13220 for (
unsigned i = 0, ie = PromOps.size();
i != ie; ++
i) {
13222 UE = PromOps[
i].getNode()->use_end();
13245 for (
unsigned i = 0, ie = Inputs.size();
i != ie; ++
i) {
13248 if (isa<ConstantSDNode>(Inputs[
i]))
13254 std::list<HandleSDNode> PromOpHandles;
13255 for (
auto &PromOp : PromOps)
13256 PromOpHandles.emplace_back(PromOp);
13263 while (!PromOpHandles.empty()) {
13265 PromOpHandles.pop_back();
13271 if (!isa<ConstantSDNode>(PromOp.
getOperand(0)) &&
13274 PromOpHandles.emplace_front(PromOp);
13279 if (isa<ConstantSDNode>(RepValue))
13288 default:
C = 0;
break;
13293 if ((!isa<ConstantSDNode>(PromOp.
getOperand(
C)) &&
13301 PromOpHandles.emplace_front(PromOp);
13309 for (
unsigned i = 0;
i < 2; ++
i)
13310 if (isa<ConstantSDNode>(Ops[
C+
i]))
13319 return N->getOperand(0);
13327 DAGCombinerInfo &DCI)
const {
13353 if (
N->getOperand(0).getOpcode() !=
ISD::AND &&
13354 N->getOperand(0).getOpcode() !=
ISD::OR &&
13355 N->getOperand(0).getOpcode() !=
ISD::XOR &&
13366 while (!BinOps.empty()) {
13372 PromOps.push_back(BinOp);
13405 for (
unsigned i = 0, ie = Inputs.size();
i != ie; ++
i) {
13406 if (isa<ConstantSDNode>(Inputs[
i]))
13410 UE = Inputs[
i].getNode()->use_end();
13420 SelectTruncOp[0].
insert(std::make_pair(
User,
13424 SelectTruncOp[0].
insert(std::make_pair(
User,
13427 SelectTruncOp[1].
insert(std::make_pair(
User,
13433 for (
unsigned i = 0, ie = PromOps.size();
i != ie; ++
i) {
13435 UE = PromOps[
i].getNode()->use_end();
13445 SelectTruncOp[0].
insert(std::make_pair(
User,
13449 SelectTruncOp[0].
insert(std::make_pair(
User,
13452 SelectTruncOp[1].
insert(std::make_pair(
User,
13458 unsigned PromBits =
N->getOperand(0).getValueSizeInBits();
13459 bool ReallyNeedsExt =
false;
13463 for (
unsigned i = 0, ie = Inputs.size();
i != ie; ++
i) {
13464 if (isa<ConstantSDNode>(Inputs[
i]))
13468 Inputs[
i].getOperand(0).getValueSizeInBits();
13469 assert(PromBits < OpBits &&
"Truncation not to a smaller bit count?");
13474 OpBits-PromBits))) ||
13477 (OpBits-(PromBits-1)))) {
13478 ReallyNeedsExt =
true;
13486 for (
unsigned i = 0, ie = Inputs.size();
i != ie; ++
i) {
13490 if (isa<ConstantSDNode>(Inputs[
i]))
13493 SDValue InSrc = Inputs[
i].getOperand(0);
13507 std::list<HandleSDNode> PromOpHandles;
13508 for (
auto &PromOp : PromOps)
13509 PromOpHandles.emplace_back(PromOp);
13515 while (!PromOpHandles.empty()) {
13517 PromOpHandles.pop_back();
13521 default:
C = 0;
break;
13526 if ((!isa<ConstantSDNode>(PromOp.
getOperand(
C)) &&
13534 PromOpHandles.emplace_front(PromOp);
13544 (SelectTruncOp[1].count(PromOp.
getNode()) &&
13546 PromOpHandles.emplace_front(PromOp);
13555 for (
unsigned i = 0;
i < 2; ++
i) {
13556 if (!isa<ConstantSDNode>(Ops[
C+
i]))
13573 auto SI0 = SelectTruncOp[0].
find(PromOp.
getNode());
13574 if (SI0 != SelectTruncOp[0].
end())
13576 auto SI1 = SelectTruncOp[1].
find(PromOp.
getNode());
13577 if (SI1 != SelectTruncOp[1].
end())
13586 if (!ReallyNeedsExt)
13587 return N->getOperand(0);
13594 N->getValueSizeInBits(0), PromBits),
13595 dl,
N->getValueType(0)));
13598 "Invalid extension type");
13601 DAG.
getConstant(
N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
13609 DAGCombinerInfo &DCI)
const {
13611 "Should be called with a SETCC node");
13613 ISD::CondCode CC = cast<CondCodeSDNode>(
N->getOperand(2))->get();
13629 EVT VT =
N->getValueType(0);
13636 return DAGCombineTruncBoolExt(
N, DCI);
13655 combineElementTruncationToVectorTruncation(
SDNode *
N,
13656 DAGCombinerInfo &DCI)
const {
13658 "Should be called with a BUILD_VECTOR node");
13663 SDValue FirstInput =
N->getOperand(0);
13665 "The input operand must be an fp-to-int conversion.");
13674 bool IsSplat =
true;
13679 EVT TargetVT =
N->getValueType(0);
13680 for (
int i = 0,
e =
N->getNumOperands();
i <
e; ++
i) {
13685 if (NextConversion != FirstConversion)
13693 if (
N->getOperand(
i) != FirstInput)
13704 for (
int i = 0,
e =
N->getNumOperands();
i <
e; ++
i) {
13710 Ops.push_back(DAG.
getUNDEF(SrcVT));
13715 Ops.push_back(Trunc);
13718 Ops.push_back(
In.isUndef() ? DAG.
getUNDEF(SrcVT) :
In.getOperand(0));
13730 return DAG.
getNode(Opcode, dl, TargetVT, BV);
13743 "Should be called with a BUILD_VECTOR node");
13748 if (!
N->getValueType(0).getVectorElementType().isByteSized())
13751 bool InputsAreConsecutiveLoads =
true;
13752 bool InputsAreReverseConsecutive =
true;
13753 unsigned ElemSize =
N->getValueType(0).getScalarType().getStoreSize();
13754 SDValue FirstInput =
N->getOperand(0);
13755 bool IsRoundOfExtLoad =
false;
13764 N->getNumOperands() == 1)
13767 for (
int i = 1,
e =
N->getNumOperands();
i <
e; ++
i) {
13769 if (IsRoundOfExtLoad &&
N->getOperand(
i).getOpcode() !=
ISD::FP_ROUND)
13772 SDValue NextInput = IsRoundOfExtLoad ?
N->getOperand(
i).getOperand(0) :
13778 IsRoundOfExtLoad ?
N->getOperand(
i-1).getOperand(0) :
N->getOperand(
i-1);
13779 LoadSDNode *LD1 = dyn_cast<LoadSDNode>(PreviousInput);
13780 LoadSDNode *LD2 = dyn_cast<LoadSDNode>(NextInput);
13787 InputsAreConsecutiveLoads =
false;
13789 InputsAreReverseConsecutive =
false;
13792 if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
13796 assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
13797 "The loads cannot be both consecutive and reverse consecutive.");
13800 IsRoundOfExtLoad ? FirstInput.
getOperand(0) : FirstInput;
13802 IsRoundOfExtLoad ?
N->getOperand(
N->getNumOperands()-1).getOperand(0) :
13803 N->getOperand(
N->getNumOperands()-1);
13805 LoadSDNode *LD1 = dyn_cast<LoadSDNode>(FirstLoadOp);
13807 if (InputsAreConsecutiveLoads) {
13808 assert(LD1 &&
"Input needs to be a LoadSDNode.");
13813 if (InputsAreReverseConsecutive) {
13814 assert(
LDL &&
"Input needs to be a LoadSDNode.");
13816 LDL->getBasePtr(),
LDL->getPointerInfo(),
13817 LDL->getAlignment());
13819 for (
int i =
N->getNumOperands() - 1;
i >= 0;
i--)
13823 DAG.
getUNDEF(
N->getValueType(0)), Ops);
13832 SDValue Input, uint64_t Elems,
13833 uint64_t CorrectElems) {
13842 for (
unsigned i = 0;
i <
N->getNumOperands();
i++) {
13844 ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
13846 ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
13847 CorrectElems = CorrectElems >> 8;
13848 Elems = Elems >> 8;
13853 DAG.
getUNDEF(Input.getValueType()), ShuffleMask);
13855 EVT VT =
N->getValueType(0);
13859 Input.getValueType().getVectorElementType(),
13881 uint64_t TargetElems[] = {
13889 uint64_t Elems = 0;
13893 auto isSExtOfVecExtract = [&](
SDValue Op) ->
bool {
13913 if (Input && Input != Extract.
getOperand(0))
13919 Elems = Elems << 8;
13928 for (
unsigned i = 0;
i <
N->getNumOperands();
i++) {
13929 if (!isSExtOfVecExtract(
N->getOperand(
i))) {
13936 int TgtElemArrayIdx;
13938 int OutputSize =
N->getValueType(0).getScalarSizeInBits();
13939 if (InputSize + OutputSize == 40)
13940 TgtElemArrayIdx = 0;
13941 else if (InputSize + OutputSize == 72)
13942 TgtElemArrayIdx = 1;
13943 else if (InputSize + OutputSize == 48)
13944 TgtElemArrayIdx = 2;
13945 else if (InputSize + OutputSize == 80)
13946 TgtElemArrayIdx = 3;
13947 else if (InputSize + OutputSize == 96)
13948 TgtElemArrayIdx = 4;
13952 uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
13954 ? CorrectElems & 0x0F0F0F0F0F0F0F0F
13955 : CorrectElems & 0xF0F0F0F0F0F0F0F0;
13956 if (Elems != CorrectElems) {
13975 SDValue Operand =
N->getOperand(0);
13981 auto *
LD = cast<LoadSDNode>(Operand);
13990 if (!ValidLDType ||
13996 LD->getChain(),
LD->getBasePtr(),
14005 DAGCombinerInfo &DCI)
const {
14007 "Should be called with a BUILD_VECTOR node");
14012 if (!Subtarget.
hasVSX())
14018 SDValue FirstInput =
N->getOperand(0);
14020 SDValue Reduced = combineElementTruncationToVectorTruncation(
N, DCI);
14035 if (Subtarget.
hasP9Altivec() && !DCI.isBeforeLegalize()) {
14061 if (FirstInput.
getOpcode() !=
N->getOperand(1).getOpcode())
14065 SDValue Ext2 =
N->getOperand(1).getOperand(0);
14072 if (!Ext1Op || !Ext2Op)
14081 if (FirstElem == 0 && SecondElem == 1)
14083 else if (FirstElem == 2 && SecondElem == 3)
14096 DAGCombinerInfo &DCI)
const {
14099 "Need an int -> FP conversion node here");
14112 if (!
Op.getOperand(0).getValueType().isSimple())
14114 if (
Op.getOperand(0).getValueType().getSimpleVT() <=
MVT(
MVT::i1) ||
14115 Op.getOperand(0).getValueType().getSimpleVT() >
MVT(
MVT::i64))
14118 SDValue FirstOperand(
Op.getOperand(0));
14119 bool SubWordLoad = FirstOperand.getOpcode() ==
ISD::LOAD &&
14120 (FirstOperand.getValueType() ==
MVT::i8 ||
14121 FirstOperand.getValueType() ==
MVT::i16);
14124 bool DstDouble =
Op.getValueType() ==
MVT::f64;
14125 unsigned ConvOp =
Signed ?
14131 LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
14139 SDValue ExtOps[] = { Ld, WidthConst };
14151 if (
Op.getOperand(0).getValueType() ==
MVT::i32)
14155 "UINT_TO_FP is supported only with FPCVT");
14173 SDValue Src =
Op.getOperand(0).getOperand(0);
14176 DCI.AddToWorklist(Src.
getNode());
14192 DCI.AddToWorklist(FP.
getNode());
14211 switch (
N->getOpcode()) {
14216 Chain =
LD->getChain();
14217 Base =
LD->getBasePtr();
14218 MMO =
LD->getMemOperand();
14237 MVT VecTy =
N->getValueType(0).getSimpleVT();
14252 Chain =
Load.getValue(1);
14280 switch (
N->getOpcode()) {
14285 Chain =
ST->getChain();
14286 Base =
ST->getBasePtr();
14287 MMO =
ST->getMemOperand();
14307 SDValue Src =
N->getOperand(SrcOpnd);
14330 StoreOps, VecTy, MMO);
14337 DAGCombinerInfo &DCI)
const {
14341 unsigned Opcode =
N->getOperand(1).getOpcode();
14344 &&
"Not a FP_TO_INT Instruction!");
14346 SDValue Val =
N->getOperand(1).getOperand(0);
14347 EVT Op1VT =
N->getOperand(1).getValueType();
14354 bool ValidTypeForStoreFltAsInt =
14362 cast<StoreSDNode>(
N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
14368 DCI.AddToWorklist(Val.
getNode());
14376 Val = DAG.
getNode(ConvOpcode,
14378 DCI.AddToWorklist(Val.
getNode());
14382 SDValue Ops[] = {
N->getOperand(0), Val,
N->getOperand(2),
14388 cast<StoreSDNode>(
N)->getMemoryVT(),
14389 cast<StoreSDNode>(
N)->getMemOperand());
14391 DCI.AddToWorklist(Val.
getNode());
14398 bool PrevElemFromFirstVec =
Mask[0] < NumElts;
14399 for (
int i = 1,
e =
Mask.size();
i <
e;
i++) {
14400 if (PrevElemFromFirstVec &&
Mask[
i] < NumElts)
14402 if (!PrevElemFromFirstVec &&
Mask[
i] >= NumElts)
14404 PrevElemFromFirstVec = !PrevElemFromFirstVec;
14415 for (
int i = 0,
e =
Op.getNumOperands();
i <
e;
i++) {
14416 FirstOp =
Op.getOperand(
i);
14422 for (
int i = 1,
e =
Op.getNumOperands();
i <
e;
i++)
14423 if (
Op.getOperand(
i) != FirstOp && !
Op.getOperand(
i).isUndef())
14433 Op =
Op.getOperand(0);
14448 int LHSMaxIdx,
int RHSMinIdx,
14449 int RHSMaxIdx,
int HalfVec,
14450 unsigned ValidLaneWidth,
14452 for (
int i = 0,
e = ShuffV.size();
i <
e;
i++) {
14453 int Idx = ShuffV[
i];
14454 if ((Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx))
14456 Subtarget.
isLittleEndian() ? HalfVec : HalfVec - ValidLaneWidth;
14467 SDLoc dl(OrigSToV);
14470 "Expecting a SCALAR_TO_VECTOR here");
14483 "Cannot produce a permuted scalar_to_vector for one element vector");
14485 unsigned ResultInElt = NumElts / 2;
14532 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
14541 if (SToVLHS || SToVRHS) {
14544 int NumEltsOut = ShuffV.size();
14549 unsigned ValidLaneWidth =
14557 int LHSMaxIdx = -1;
14558 int RHSMinIdx = -1;
14559 int RHSMaxIdx = -1;
14572 LHSMaxIdx = NumEltsOut / NumEltsIn;
14581 RHSMinIdx = NumEltsOut;
14582 RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;
14595 HalfVec, ValidLaneWidth, Subtarget);
14600 if (!isa<ShuffleVectorSDNode>(Res))
14602 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
14605 SDValue TheSplat = IsLittleEndian ? RHS : LHS;
14621 if (IsLittleEndian) {
14624 if (
Mask[0] < NumElts)
14625 for (
int i = 1,
e =
Mask.size();
i <
e;
i += 2)
14626 ShuffV[
i] = (ShuffV[
i - 1] + NumElts);
14630 for (
int i = 0,
e =
Mask.size();
i <
e;
i += 2)
14631 ShuffV[
i] = (ShuffV[
i + 1] + NumElts);
14635 if (
Mask[0] < NumElts)
14636 for (
int i = 0,
e =
Mask.size();
i <
e;
i += 2)
14637 ShuffV[
i] = ShuffV[
i + 1] - NumElts;
14641 for (
int i = 1,
e =
Mask.size();
i <
e;
i += 2)
14642 ShuffV[
i] = ShuffV[
i - 1] - NumElts;
14648 cast<BuildVectorSDNode>(TheSplat.
getNode())->getSplatValue();
14651 if (IsLittleEndian)
14660 DAGCombinerInfo &DCI)
const {
14662 "Not a reverse memop pattern!");
14667 auto I =
Mask.rbegin();
14668 auto E =
Mask.rend();
14670 for (;
I !=
E; ++
I) {
14690 if(!IsElementReverse(SVN))
14731 switch (
N->getOpcode()) {
14734 return combineADD(
N, DCI);
14736 return combineSHL(
N, DCI);
14738 return combineSRA(
N, DCI);
14740 return combineSRL(
N, DCI);
14742 return combineMUL(
N, DCI);
14745 return combineFMALike(
N, DCI);
14748 return N->getOperand(0);
14752 return N->getOperand(0);
14756 if (
C->isNullValue() ||
14757 C->isAllOnesValue())
14758 return N->getOperand(0);
14764 return DAGCombineExtBoolTrunc(
N, DCI);
14766 return combineTRUNCATE(
N, DCI);
14768 if (
SDValue CSCC = combineSetCC(
N, DCI))
14772 return DAGCombineTruncBoolExt(
N, DCI);
14775 return combineFPToIntToFP(
N, DCI);
14778 LSBaseSDNode* LSBase = cast<LSBaseSDNode>(
N->getOperand(0));
14779 return combineVReverseMemOP(cast<ShuffleVectorSDNode>(
N), LSBase, DCI);
14781 return combineVectorShuffle(cast<ShuffleVectorSDNode>(
N), DCI.
DAG);
14784 EVT Op1VT =
N->getOperand(1).getValueType();
14785 unsigned Opcode =
N->getOperand(1).getOpcode();
14788 SDValue Val= combineStoreFPToInt(
N, DCI);
14795 SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(
N), DCI);
14801 if (cast<StoreSDNode>(
N)->isUnindexed() && Opcode ==
ISD::BSWAP &&
14802 N->getOperand(1).getNode()->hasOneUse() &&
14808 EVT mVT = cast<StoreSDNode>(
N)->getMemoryVT();
14812 SDValue BSwapOp =
N->getOperand(1).getOperand(0);
14819 if (Op1VT.
bitsGT(mVT)) {
14829 N->getOperand(0), BSwapOp,
N->getOperand(2), DAG.
getValueType(mVT)
14833 Ops, cast<StoreSDNode>(
N)->getMemoryVT(),
14834 cast<StoreSDNode>(
N)->getMemOperand());
14840 isa<ConstantSDNode>(
N->getOperand(1)) && Op1VT ==
MVT::i32) {
14842 EVT MemVT = cast<StoreSDNode>(
N)->getMemoryVT();
14852 cast<StoreSDNode>(
N)->setTruncatingStore(
true);
14869 EVT VT =
LD->getValueType(0);
14888 auto ReplaceTwoFloatLoad = [&]() {
14904 if (!
LD->hasNUsesOfValue(2, 0))
14907 auto UI =
LD->use_begin();
14908 while (UI.getUse().getResNo() != 0) ++UI;
14910 while (UI.getUse().getResNo() != 0) ++UI;
14911 SDNode *RightShift = *UI;
14920 !isa<ConstantSDNode>(RightShift->
getOperand(1)) ||
14948 if (
LD->isIndexed()) {
14950 "Non-pre-inc AM on PPC?");
14959 LD->getPointerInfo(),
LD->getAlignment(),
14960 MMOFlags,
LD->getAAInfo());
14966 LD->getPointerInfo().getWithOffset(4),
14967 MinAlign(
LD->getAlignment(), 4), MMOFlags,
LD->getAAInfo());
14969 if (
LD->isIndexed()) {
14983 if (ReplaceTwoFloatLoad())
14986 EVT MemVT =
LD->getMemoryVT();
14995 LD->getAlign() < ABIAlignment) {
15026 MVT PermCntlTy, PermTy, LDTy;
15027 Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
15028 : Intrinsic::ppc_altivec_lvsl;
15029 IntrLD = Intrinsic::ppc_altivec_lvx;
15030 IntrPerm = Intrinsic::ppc_altivec_vperm;
15051 SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
15055 BaseLoadOps, LDTy, BaseMMO);
15064 int IncValue = IncOffset;
15081 SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
15085 ExtraLoadOps, LDTy, ExtraMMO);
15096 if (isLittleEndian)
15098 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
15101 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
15120 unsigned IID = cast<ConstantSDNode>(
N->getOperand(0))->getZExtValue();
15122 : Intrinsic::ppc_altivec_lvsl);
15123 if (IID ==
Intr &&
N->getOperand(1)->getOpcode() ==
ISD::ADD) {
15130 .
zext(
Add.getScalarValueSizeInBits()))) {
15131 SDNode *BasePtr =
Add->getOperand(0).getNode();
15133 UE = BasePtr->use_end();
15136 cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() ==
15147 if (isa<ConstantSDNode>(
Add->getOperand(1))) {
15148 SDNode *BasePtr =
Add->getOperand(0).getNode();
15150 UE = BasePtr->use_end(); UI != UE; ++UI) {
15151 if (UI->getOpcode() ==
ISD::ADD &&
15152 isa<ConstantSDNode>(UI->getOperand(1)) &&
15153 (cast<ConstantSDNode>(
Add->getOperand(1))->getZExtValue() -
15154 cast<ConstantSDNode>(UI->getOperand(1))->getZExtValue()) %
15155 (1ULL <<
Bits) == 0) {
15160 cast<ConstantSDNode>(
VI->getOperand(0))->getZExtValue() == IID) {
15172 (IID == Intrinsic::ppc_altivec_vmaxsw ||
15173 IID == Intrinsic::ppc_altivec_vmaxsh ||
15174 IID == Intrinsic::ppc_altivec_vmaxsb)) {
15190 V2.getOperand(1) == V1) {
15208 switch (cast<ConstantSDNode>(
N->getOperand(1))->getZExtValue()) {
15211 case Intrinsic::ppc_vsx_lxvw4x:
15212 case Intrinsic::ppc_vsx_lxvd2x:
15221 switch (cast<ConstantSDNode>(
N->getOperand(1))->getZExtValue()) {
15224 case Intrinsic::ppc_vsx_stxvw4x:
15225 case Intrinsic::ppc_vsx_stxvd2x:
15234 bool Is64BitBswapOn64BitTgt =
15237 N->getOperand(0).hasOneUse();
15238 if (IsSingleUseNormalLd &&
15240 (Subtarget.
hasLDBRX() && Is64BitBswapOn64BitTgt))) {
15253 Ops,
LD->getMemoryVT(),
LD->getMemOperand());
15274 !IsSingleUseNormalLd)
15279 if (!
LD->isSimple())
15283 LD->getPointerInfo(),
LD->getAlignment());
15288 LD->getMemOperand(), 4, 4);
15298 Hi.getOperand(0).getValue(1),
Lo.getOperand(0).getValue(1));
15307 if (!
N->getOperand(0).hasOneUse() &&
15308 !
N->getOperand(1).hasOneUse() &&
15309 !
N->getOperand(2).hasOneUse()) {
15312 SDNode *VCMPrecNode =
nullptr;
15314 SDNode *LHSN =
N->getOperand(0).getNode();
15318 UI->getOperand(1) ==
N->getOperand(1) &&
15319 UI->getOperand(2) ==
N->getOperand(2) &&
15320 UI->getOperand(0) ==
N->getOperand(0)) {
15333 SDNode *FlagUser =
nullptr;
15335 FlagUser ==
nullptr; ++UI) {
15336 assert(UI != VCMPrecNode->
use_end() &&
"Didn't find user!");
15349 return SDValue(VCMPrecNode, 0);
15357 cast<ConstantSDNode>(
Cond.getOperand(1))->getZExtValue() ==
15358 Intrinsic::loop_decrement) {
15364 "Counter decrement has more than one use");
15376 ISD::CondCode CC = cast<CondCodeSDNode>(
N->getOperand(1))->get();
15377 SDValue LHS =
N->getOperand(2), RHS =
N->getOperand(3);
15384 Intrinsic::loop_decrement &&
15390 cast<ConstantSDNode>(LHS.
getOperand(1))->getZExtValue() ==
15391 Intrinsic::loop_decrement &&
15392 isa<ConstantSDNode>(RHS)) {
15394 "Counter decrement comparison is not EQ or NE");
15396 unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
15404 "Counter decrement has more than one use");
15407 N->getOperand(0),
N->getOperand(4));
15416 assert(isDot &&
"Can't compare against a vector result!");
15420 unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
15421 if (Val != 0 && Val != 1) {
15423 return N->getOperand(0);
15426 N->getOperand(0),
N->getOperand(4));
15429 bool BranchOnWhenPredTrue = (CC ==
ISD::SETEQ) ^ (Val == 0);
15442 switch (cast<ConstantSDNode>(LHS.
getOperand(1))->getZExtValue()) {
15461 N->getOperand(4), CompNode.
getValue(1));
15466 return DAGCombineBuildVector(
N, DCI);
15468 return combineABS(
N, DCI);
15470 return combineVSelect(
N, DCI);
15481 EVT VT =
N->getValueType(0);
15485 !(Divisor.
isPowerOf2() || (-Divisor).isPowerOf2()))
15491 bool IsNegPow2 = (-Divisor).isPowerOf2();
15496 Created.push_back(
Op.getNode());
15500 Created.push_back(
Op.getNode());
15512 const APInt &DemandedElts,
15514 unsigned Depth)
const {
15516 switch (
Op.getOpcode()) {
15520 if (cast<VTSDNode>(
Op.getOperand(2))->getVT() ==
MVT::i16)
15521 Known.
Zero = 0xFFFF0000;
15525 switch (cast<ConstantSDNode>(
Op.getOperand(0))->getZExtValue()) {
15527 case Intrinsic::ppc_altivec_vcmpbfp_p:
15528 case Intrinsic::ppc_altivec_vcmpeqfp_p:
15529 case Intrinsic::ppc_altivec_vcmpequb_p:
15530 case Intrinsic::ppc_altivec_vcmpequh_p:
15531 case Intrinsic::ppc_altivec_vcmpequw_p:
15532 case Intrinsic::ppc_altivec_vcmpequd_p:
15533 case Intrinsic::ppc_altivec_vcmpequq_p:
15534 case Intrinsic::ppc_altivec_vcmpgefp_p:
15535 case Intrinsic::ppc_altivec_vcmpgtfp_p:
15536 case Intrinsic::ppc_altivec_vcmpgtsb_p:
15537 case Intrinsic::ppc_altivec_vcmpgtsh_p:
15538 case Intrinsic::ppc_altivec_vcmpgtsw_p:
15539 case Intrinsic::ppc_altivec_vcmpgtsd_p:
15540 case Intrinsic::ppc_altivec_vcmpgtsq_p:
15541 case Intrinsic::ppc_altivec_vcmpgtub_p:
15542 case Intrinsic::ppc_altivec_vcmpgtuh_p:
15543 case Intrinsic::ppc_altivec_vcmpgtuw_p:
15544 case Intrinsic::ppc_altivec_vcmpgtud_p:
15545 case Intrinsic::ppc_altivec_vcmpgtuq_p:
15583 uint64_t LoopSize = 0;
15585 for (
auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) {
15586 LoopSize +=
TII->getInstSizeInBytes(*J);
15591 if (LoopSize > 16 && LoopSize <= 32)
15605 if (Constraint.
size() == 1) {
15606 switch (Constraint[0]) {
15624 }
else if (Constraint ==
"wc") {
15626 }
else if (Constraint ==
"wa" || Constraint ==
"wd" ||
15627 Constraint ==
"wf" || Constraint ==
"ws" ||
15628 Constraint ==
"wi" || Constraint ==
"ww") {
15641 Value *CallOperandVal =
info.CallOperandVal;
15644 if (!CallOperandVal)
15651 else if ((
StringRef(constraint) ==
"wa" ||
15654 type->isVectorTy())
15656 else if (
StringRef(constraint) ==
"wi" &&
type->isIntegerTy(64))
15658 else if (
StringRef(constraint) ==
"ws" &&
type->isDoubleTy())
15660 else if (
StringRef(constraint) ==
"ww" &&
type->isFloatTy())
15663 switch (*constraint) {
15668 if (
type->isIntegerTy())
15672 if (
type->isFloatTy())
15676 if (
type->isDoubleTy())
15680 if (
type->isVectorTy())
15693 std::pair<unsigned, const TargetRegisterClass *>
15697 if (Constraint.
size() == 1) {
15699 switch (Constraint[0]) {
15702 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
15703 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
15706 return std::make_pair(0U, &PPC::G8RCRegClass);
15707 return std::make_pair(0U, &PPC::GPRCRegClass);
15713 if (Subtarget.
hasSPE()) {
15715 return std::make_pair(0U, &PPC::GPRCRegClass);
15717 return std::make_pair(0U, &PPC::SPERCRegClass);
15720 return std::make_pair(0U, &PPC::F4RCRegClass);
15722 return std::make_pair(0U, &PPC::F8RCRegClass);
15727 return std::make_pair(0U, &PPC::VRRCRegClass);
15730 return std::make_pair(0U, &PPC::CRRCRegClass);
15732 }
else if (Constraint ==
"wc" && Subtarget.
useCRBits()) {
15734 return std::make_pair(0U, &PPC::CRBITRCRegClass);
15735 }
else if ((Constraint ==
"wa" || Constraint ==
"wd" ||
15736 Constraint ==
"wf" || Constraint ==
"wi") &&
15741 return std::make_pair(0U, &PPC::VSRCRegClass);
15743 return std::make_pair(0U, &PPC::VSSRCRegClass);
15744 return std::make_pair(0U, &PPC::VSFRCRegClass);
15745 }
else if ((Constraint ==
"ws" || Constraint ==
"ww") && Subtarget.
hasVSX()) {
15747 return std::make_pair(0U, &PPC::VSSRCRegClass);
15749 return std::make_pair(0U, &PPC::VSFRCRegClass);
15750 }
else if (Constraint ==
"lr") {
15752 return std::make_pair(0U, &PPC::LR8RCRegClass);
15754 return std::make_pair(0U, &PPC::LRRCRegClass);
15759 if (Constraint[0] ==
'{' && Constraint[Constraint.
size() - 1] ==
'}') {
15763 if (Constraint.
size() > 3 && Constraint[1] ==
'v' && Constraint[2] ==
's') {
15764 int VSNum = atoi(Constraint.
data() + 3);
15765 assert(VSNum >= 0 && VSNum <= 63 &&
15766 "Attempted to access a vsr out of range");
15768 return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
15769 return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
15774 if (Constraint.
size() > 3 && Constraint[1] ==
'f') {
15775 int RegNum = atoi(Constraint.
data() + 2);
15776 if (RegNum > 31 || RegNum < 0)
15779 return Subtarget.
hasSPE()
15780 ? std::make_pair(PPC::R0 + RegNum, &PPC::GPRCRegClass)
15781 : std::make_pair(PPC::F0 + RegNum, &PPC::F4RCRegClass);
15783 return Subtarget.
hasSPE()
15784 ? std::make_pair(PPC::S0 + RegNum, &PPC::SPERCRegClass)
15785 : std::make_pair(PPC::F0 + RegNum, &PPC::F8RCRegClass);
15789 std::pair<unsigned, const TargetRegisterClass *>
R =
15799 PPC::GPRCRegClass.contains(
R.first))
15801 PPC::sub_32, &PPC::G8RCRegClass),
15802 &PPC::G8RCRegClass);
15805 if (!
R.second &&
StringRef(
"{cc}").equals_insensitive(Constraint)) {
15806 R.first = PPC::CR0;
15807 R.second = &PPC::CRRCRegClass;
15811 if (Subtarget.
isAIXABI() && !
TM.getAIXExtendedAltivecABI()) {
15812 if (((
R.first >= PPC::V20 &&
R.first <= PPC::V31) ||
15813 (
R.first >= PPC::VF20 &&
R.first <= PPC::VF31)) &&
15814 (
R.second == &PPC::VSRCRegClass ||
R.second == &PPC::VSFRCRegClass))
15815 errs() <<
"warning: vector registers 20 to 32 are reserved in the "
15816 "default AIX AltiVec ABI and cannot be used\n";
15825 std::string &Constraint,
15826 std::vector<SDValue>&Ops,
15831 if (Constraint.length() > 1)
return;
15833 char Letter = Constraint[0];
15857 if (isShiftedUInt<16, 16>(
Value))
15861 if (isShiftedInt<16, 16>(
Value))
15889 if (Result.getNode()) {
15890 Ops.push_back(Result);
15922 switch (AM.
Scale) {
15953 unsigned Depth = cast<ConstantSDNode>(
Op.getOperand(0))->getZExtValue();
15959 bool isPPC64 = Subtarget.
isPPC64();
15963 SDValue FrameAddr = LowerFRAMEADDR(
Op, DAG);
15973 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
15981 unsigned Depth = cast<ConstantSDNode>(
Op.getOperand(0))->getZExtValue();
15994 FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
15996 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
16010 bool isPPC64 = Subtarget.
isPPC64();
16044 if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA))
16062 unsigned Intrinsic)
const {
16063 switch (Intrinsic) {
16064 case Intrinsic::ppc_atomicrmw_xchg_i128:
16065 case Intrinsic::ppc_atomicrmw_add_i128:
16066 case Intrinsic::ppc_atomicrmw_sub_i128:
16067 case Intrinsic::ppc_atomicrmw_nand_i128:
16068 case Intrinsic::ppc_atomicrmw_and_i128:
16069 case Intrinsic::ppc_atomicrmw_or_i128:
16070 case Intrinsic::ppc_atomicrmw_xor_i128:
16071 case Intrinsic::ppc_cmpxchg_i128:
16074 Info.ptrVal =
I.getArgOperand(0);
16080 case Intrinsic::ppc_altivec_lvx:
16081 case Intrinsic::ppc_altivec_lvxl:
16082 case Intrinsic::ppc_altivec_lvebx:
16083 case Intrinsic::ppc_altivec_lvehx:
16084 case Intrinsic::ppc_altivec_lvewx:
16085 case Intrinsic::ppc_vsx_lxvd2x:
16086 case Intrinsic::ppc_vsx_lxvw4x:
16087 case Intrinsic::ppc_vsx_lxvd2x_be:
16088 case Intrinsic::ppc_vsx_lxvw4x_be:
16089 case Intrinsic::ppc_vsx_lxvl:
16090 case Intrinsic::ppc_vsx_lxvll: {
16092 switch (Intrinsic) {
16093 case Intrinsic::ppc_altivec_lvebx:
16096 case Intrinsic::ppc_altivec_lvehx:
16099 case Intrinsic::ppc_altivec_lvewx:
16102 case Intrinsic::ppc_vsx_lxvd2x:
16103 case Intrinsic::ppc_vsx_lxvd2x_be:
16113 Info.ptrVal =
I.getArgOperand(0);
16120 case Intrinsic::ppc_altivec_stvx:
16121 case Intrinsic::ppc_altivec_stvxl:
16122 case Intrinsic::ppc_altivec_stvebx:
16123 case Intrinsic::ppc_altivec_stvehx:
16124 case Intrinsic::ppc_altivec_stvewx:
16125 case Intrinsic::ppc_vsx_stxvd2x:
16126 case Intrinsic::ppc_vsx_stxvw4x:
16127 case Intrinsic::ppc_vsx_stxvd2x_be:
16128 case Intrinsic::ppc_vsx_stxvw4x_be:
16129 case Intrinsic::ppc_vsx_stxvl:
16130 case Intrinsic::ppc_vsx_stxvll: {
16132 switch (Intrinsic) {
16133 case Intrinsic::ppc_altivec_stvebx:
16136 case Intrinsic::ppc_altivec_stvehx:
16139 case Intrinsic::ppc_altivec_stvewx:
16142 case Intrinsic::ppc_vsx_stxvd2x:
16143 case Intrinsic::ppc_vsx_stxvd2x_be:
16153 Info.ptrVal =
I.getArgOperand(1);
16194 return !(BitSize == 0 || BitSize > 64);
16202 return NumBits1 == 64 && NumBits2 == 32;
16210 return NumBits1 == 64 && NumBits2 == 32;
16217 EVT MemVT =
LD->getMemoryVT();
16235 "invalid fpext types");
16252 bool *
Fast)
const {
16270 if (Subtarget.
hasVSX()) {
16293 if (
auto *ConstNode = dyn_cast<ConstantSDNode>(
C.getNode())) {
16294 if (!ConstNode->getAPIntValue().isSignedIntN(64))
16302 int64_t Imm = ConstNode->getSExtValue();
16303 unsigned Shift = countTrailingZeros<uint64_t>(Imm);
16307 uint64_t UImm =
static_cast<uint64_t
>(Imm);
16336 if (!
I->hasOneUse())
16340 assert(
User &&
"A single use instruction with no uses.");
16342 switch (
I->getOpcode()) {
16343 case Instruction::FMul: {
16345 if (
User->getOpcode() != Instruction::FSub &&
16346 User->getOpcode() != Instruction::FAdd)
16392 static const MCPhysReg ScratchRegs[] = {
16393 PPC::X12, PPC::LR8, PPC::CTR8, 0
16396 return ScratchRegs;
16400 const Constant *PersonalityFn)
const {
16401 return Subtarget.
isPPC64() ? PPC::X3 : PPC::R3;
16405 const Constant *PersonalityFn)
const {
16411 EVT VT ,
unsigned DefinedValues)
const {
16449 bool LegalOps,
bool OptForSize,
16451 unsigned Depth)
const {
16455 unsigned Opc =
Op.getOpcode();
16456 EVT VT =
Op.getValueType();
16485 N0Cost,
Depth + 1);
16489 N1Cost,
Depth + 1);
16491 if (NegN0 && N0Cost <= N1Cost) {
16493 return DAG.
getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);
16494 }
else if (NegN1) {
16496 return DAG.
getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);
16539 bool ForCodeSize)
const {
16568 unsigned Opcode =
N->getOpcode();
16569 unsigned TargetOpcode;
16588 if (
Mask->getZExtValue() == OpSizeInBits - 1)
16594 SDValue PPCTargetLowering::combineSHL(
SDNode *
N, DAGCombinerInfo &DCI)
const {
16624 SDValue PPCTargetLowering::combineSRA(
SDNode *
N, DAGCombinerInfo &DCI)
const {
16631 SDValue PPCTargetLowering::combineSRL(
SDNode *
N, DAGCombinerInfo &DCI)
const {
16650 auto isZextOfCompareWithConstant = [](
SDValue Op) {
16661 int64_t NegConstant = 0 -
Constant->getSExtValue();
16670 bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
16671 bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
16674 if (LHSHasPattern && !RHSHasPattern)
16676 else if (!LHSHasPattern && !RHSHasPattern)
16684 int64_t NegConstant = 0 -
Constant->getSExtValue();
16686 switch(cast<CondCodeSDNode>(Cmp.
getOperand(2))->get()) {
16697 SDValue AddOrZ = NegConstant != 0 ?
Add : Z;
16712 SDValue AddOrZ = NegConstant != 0 ?
Add : Z;
16749 if (!GSDN || !ConstNode)
16756 if (!isInt<34>(NewOffset))
16769 SDValue PPCTargetLowering::combineADD(
SDNode *
N, DAGCombinerInfo &DCI)
const {
16789 DAGCombinerInfo &DCI)
const {
16793 if (
SDValue CRTruncValue = DAGCombineTruncBoolExt(
N, DCI))
16794 return CRTruncValue;
16802 EVT VT =
N->getValueType(0);
16813 DCI.DAG.getTargetConstant(0, dl,
MVT::i32));
16822 int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
16832 EltToExtract = EltToExtract ? 0 : 1;
16842 return DCI.DAG.getNode(
16844 DCI.DAG.getTargetConstant(EltToExtract, dl,
MVT::i32));
16849 SDValue PPCTargetLowering::combineMUL(
SDNode *
N, DAGCombinerInfo &DCI)
const {
16853 if (!ConstOpOrElement)
16861 auto IsProfitable = [
this](
bool IsNeg,
bool IsAddOne,
EVT VT) ->
bool {
16884 return IsAddOne && IsNeg ? VT.
isVector() :
true;
16888 EVT VT =
N->getValueType(0);
16895 if ((MulAmtAbs - 1).isPowerOf2()) {
16899 if (!IsProfitable(IsNeg,
true, VT))
16912 }
else if ((MulAmtAbs + 1).isPowerOf2()) {
16916 if (!IsProfitable(IsNeg,
false, VT))
16937 DAGCombinerInfo &DCI)
const {
16942 EVT VT =
N->getValueType(0);
16945 unsigned Opc =
N->getOpcode();
16947 bool LegalOps = !DCI.isBeforeLegalizeOps();
16971 bool PPCTargetLowering::mayBeEmittedAsTailCall(
const CallInst *CI)
const {
17001 bool PPCTargetLowering::hasBitPreservingFPLogic(
EVT VT)
const {
17002 if (!Subtarget.
hasVSX())
17010 bool PPCTargetLowering::
17011 isMaskAndCmp0FoldingBeneficial(
const Instruction &AndI)
const {
17016 if (CI->getBitWidth() > 64)
17018 int64_t ConstVal = CI->getZExtValue();
17020 (
isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
17032 SDValue PPCTargetLowering::combineABS(
SDNode *
N, DAGCombinerInfo &DCI)
const {
17035 "Only combine this when P9 altivec supported!");
17036 EVT VT =
N->getValueType(0);
17042 if (
N->getOperand(0).getOpcode() ==
ISD::SUB) {
17045 unsigned SubOpcd0 =
N->getOperand(0)->getOperand(0).getOpcode();
17046 unsigned SubOpcd1 =
N->getOperand(0)->getOperand(1).getOpcode();
17052 N->getOperand(0)->getOperand(0),
17053 N->getOperand(0)->getOperand(1),
17058 if (
N->getOperand(0).getValueType() ==
MVT::v4i32 &&
17059 N->getOperand(0).hasOneUse()) {
17061 N->getOperand(0)->getOperand(0),
17062 N->getOperand(0)->getOperand(1),
17076 DAGCombinerInfo &DCI)
const {
17079 "Only combine this when P9 altivec supported!");
17084 SDValue TrueOpnd =
N->getOperand(1);
17085 SDValue FalseOpnd =
N->getOperand(2);
17086 EVT VT =
N->getOperand(1).getValueType();
17126 CmpOpnd1, CmpOpnd2,
17135 PPC::AddrMode PPCTargetLowering::getAddrModeForFlags(
unsigned Flags)
const {
17141 if ((Flags & FlagSet) == FlagSet)
17144 if ((Flags & FlagSet) == FlagSet)
17147 if ((Flags & FlagSet) == FlagSet)
17168 if ((FrameIndexAlign % 4) != 0)
17170 if ((FrameIndexAlign % 16) != 0)
17175 if ((FrameIndexAlign % 4) == 0)
17177 if ((FrameIndexAlign % 16) == 0)
17190 auto SetAlignFlagsForImm = [&](uint64_t Imm) {
17191 if ((Imm & 0
x3) == 0)
17193 if ((Imm & 0xf) == 0)
17199 const APInt &ConstImm = CN->getAPIntValue();
17218 const APInt &ConstImm = CN->getAPIntValue();
17229 !cast<ConstantSDNode>(RHS.
getOperand(1))->getZExtValue())
17241 unsigned PPCTargetLowering::computeMOFlags(
const SDNode *Parent,
SDValue N,
17258 if (
const LSBaseSDNode *LSB = dyn_cast<LSBaseSDNode>(Parent))
17259 if (LSB->isIndexed())
17264 const MemSDNode *MN = dyn_cast<MemSDNode>(Parent);
17265 assert(MN &&
"Parent should be a MemSDNode!");
17269 assert(
Size <= 64 &&
"Not expecting scalar integers larger than 8 bytes!");
17272 else if (
Size == 32)
17279 else if (
Size == 256)
17296 if (
const LoadSDNode *LN = dyn_cast<LoadSDNode>(Parent)) {
17322 bool IsNonP1034BitConst =
17326 IsNonP1034BitConst)
17339 int16_t ForceXFormImm = 0;
17342 Disp =
N.getOperand(0);
17343 Base =
N.getOperand(1);
17354 !
N.getOperand(1).hasOneUse() || !
N.getOperand(0).hasOneUse())) {
17355 Disp =
N.getOperand(0);
17356 Base =
N.getOperand(1);
17372 if (!isa<FrameIndexSDNode>(
N))
17390 unsigned Flags = computeMOFlags(Parent,
N, DAG);
17410 int16_t Imm = cast<ConstantSDNode>(Op1)->getAPIntValue().getZExtValue();
17424 Disp =
N.getOperand(1).getOperand(0);
17429 Base =
N.getOperand(0);
17436 auto *CN = cast<ConstantSDNode>(
N);
17437 EVT CNType = CN->getValueType(0);
17438 uint64_t CNImm = CN->getZExtValue();
17451 int32_t
Addr = (int32_t)CNImm;
17475 Base = FI ?
N :
N.getOperand(1);
17487 bool IsVarArg)
const {
17521 return Intrinsic::ppc_atomicrmw_xchg_i128;
17523 return Intrinsic::ppc_atomicrmw_add_i128;
17525 return Intrinsic::ppc_atomicrmw_sub_i128;
17527 return Intrinsic::ppc_atomicrmw_and_i128;
17529 return Intrinsic::ppc_atomicrmw_or_i128;
17531 return Intrinsic::ppc_atomicrmw_xor_i128;
17533 return Intrinsic::ppc_atomicrmw_nand_i128;
17541 "Only support quadword now");
17543 Type *ValTy = cast<PointerType>(AlignedAddr->
getType())->getElementType();
17548 Value *IncrLo =
Builder.CreateTrunc(Incr, Int64Ty,
"incr_lo");
17550 Builder.CreateTrunc(
Builder.CreateLShr(Incr, 64), Int64Ty,
"incr_hi");
17566 "Only support quadword now");
17568 Type *ValTy = cast<PointerType>(AlignedAddr->
getType())->getElementType();
17573 Value *CmpLo =
Builder.CreateTrunc(CmpVal, Int64Ty,
"cmp_lo");
17575 Builder.CreateTrunc(
Builder.CreateLShr(CmpVal, 64), Int64Ty,
"cmp_hi");
17576 Value *NewLo =
Builder.CreateTrunc(NewVal, Int64Ty,
"new_lo");
17578 Builder.CreateTrunc(
Builder.CreateLShr(NewVal, 64), Int64Ty,
"new_hi");
17583 Builder.CreateCall(IntCmpXchg, {
Addr, CmpLo, CmpHi, NewLo, NewHi});